Assorted md fixes for 3.10
A mixed bad of little fixes. No real new functionality here. Several patches are tagged for -stable. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.19 (GNU/Linux) iQIVAwUAUYBTNTnsnt1WYoG5AQKn2w/7BQDRB5pNfOe+Omb7cwO8QKIWIn2RvJ1H 8ki0+folaqX9OOpWZUb1KEKhOnJSMPL6c6NS5a358aQRgiKJBRQ5Qoe4nz9v7BuY SQnO8VjNrsHeEZ7iH1krtx0yNUklinO5j5HxEnNp7cL3m1HVxmS7hKJOlc9LYz9R KLqOduBuhLHUSQnnzU52LiQ9Y7Au5eUtmwFG2SF+u4DIN+lyKfD45Y4PR2Um/hPe 7QdR2GOX75YMgAWjq3kafodrhJFNzxSoFuexjRQeGC4bTn1JLGK6Uu/GER8VSnS+ FHoet85W5rg12NtJjC3qlfxahfmBYThsi00RB4EuNZxto3is0o8NwW6bnNjKSjHh wRjXUOr/ANnCYXjPhffjIvZERMyma/7GrjG20cxDmj/iJnucI1oprfzFHx7RAOKo gcBFVJiPLYzXZJkyU6nA4bjvWk/KYM/a5nzzaBsU9KObmgqL7H7SJTh8nKrTNhDr M4AkC3DIVlDaNoSloFRtyLXylrHT5KBBBBklWGFZiauicaLPbC8asElFVZjsRyTL 45ZLTmyFXHtVn6HIdY8JOtGR+iKay/EHFN62TKH8AMmCQY+JKlaQdDXFbqzZ29Gz n5pjA8/+9ddP9P+f8xC8P29mk7HkvujJH3gOepU78b8GIIzBXMKc7NZVHhPVv7f4 N6u7i5uxgxc= =Z9Ws -----END PGP SIGNATURE----- Merge tag 'md-3.10' of git://neil.brown.name/md Pull md fixes from NeilBrown: "A mixed bag of little fixes. No real new functionality here. Several patches are tagged for -stable." * tag 'md-3.10' of git://neil.brown.name/md: MD: ignore discard request for hard disks of hybid raid1/raid10 array md: bad block list should default to disabled. md: raid1/raid10 md devices leak memory when stopping DM RAID: Add message/status support for changing sync action MD: Export 'md_reap_sync_thread' function md: don't update metadata when stopping a read-only array. md: Allow devices to be re-added to a read-only array. md/raid10: Allow skipping recovery when clean arrays are assembled MD: Fix typos in MD documentation md/raid5: avoid an extra write when writing to a known-bad-block. md/raid5: Change or of some order to improve efficiency. md: use set_bit_le and clear_bit_le md: HOT_DISK_REMOVE shouldn't make a read-auto device active. md: use common code for all calls to ->hot_remove_disk() md: never update metadata when array is read-only.
This commit is contained in:
commit
f1e9a236e5
9 changed files with 368 additions and 131 deletions
|
@ -1,10 +1,13 @@
|
||||||
dm-raid
|
dm-raid
|
||||||
-------
|
=======
|
||||||
|
|
||||||
The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
|
The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
|
||||||
It allows the MD RAID drivers to be accessed using a device-mapper
|
It allows the MD RAID drivers to be accessed using a device-mapper
|
||||||
interface.
|
interface.
|
||||||
|
|
||||||
|
|
||||||
|
Mapping Table Interface
|
||||||
|
-----------------------
|
||||||
The target is named "raid" and it accepts the following parameters:
|
The target is named "raid" and it accepts the following parameters:
|
||||||
|
|
||||||
<raid_type> <#raid_params> <raid_params> \
|
<raid_type> <#raid_params> <raid_params> \
|
||||||
|
@ -47,7 +50,7 @@ The target is named "raid" and it accepts the following parameters:
|
||||||
followed by optional parameters (in any order):
|
followed by optional parameters (in any order):
|
||||||
[sync|nosync] Force or prevent RAID initialization.
|
[sync|nosync] Force or prevent RAID initialization.
|
||||||
|
|
||||||
[rebuild <idx>] Rebuild drive number idx (first drive is 0).
|
[rebuild <idx>] Rebuild drive number 'idx' (first drive is 0).
|
||||||
|
|
||||||
[daemon_sleep <ms>]
|
[daemon_sleep <ms>]
|
||||||
Interval between runs of the bitmap daemon that
|
Interval between runs of the bitmap daemon that
|
||||||
|
@ -56,9 +59,9 @@ The target is named "raid" and it accepts the following parameters:
|
||||||
|
|
||||||
[min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
[min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
||||||
[max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
[max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
||||||
[write_mostly <idx>] Drive index is write-mostly
|
[write_mostly <idx>] Mark drive index 'idx' write-mostly.
|
||||||
[max_write_behind <sectors>] See '-write-behind=' (man mdadm)
|
[max_write_behind <sectors>] See '--write-behind=' (man mdadm)
|
||||||
[stripe_cache <sectors>] Stripe cache size (higher RAIDs only)
|
[stripe_cache <sectors>] Stripe cache size (RAID 4/5/6 only)
|
||||||
[region_size <sectors>]
|
[region_size <sectors>]
|
||||||
The region_size multiplied by the number of regions is the
|
The region_size multiplied by the number of regions is the
|
||||||
logical size of the array. The bitmap records the device
|
logical size of the array. The bitmap records the device
|
||||||
|
@ -122,7 +125,7 @@ The target is named "raid" and it accepts the following parameters:
|
||||||
given for both the metadata and data drives for a given position.
|
given for both the metadata and data drives for a given position.
|
||||||
|
|
||||||
|
|
||||||
Example tables
|
Example Tables
|
||||||
--------------
|
--------------
|
||||||
# RAID4 - 4 data drives, 1 parity (no metadata devices)
|
# RAID4 - 4 data drives, 1 parity (no metadata devices)
|
||||||
# No metadata devices specified to hold superblock/bitmap info
|
# No metadata devices specified to hold superblock/bitmap info
|
||||||
|
@ -141,26 +144,70 @@ Example tables
|
||||||
raid4 4 2048 sync min_recovery_rate 20 \
|
raid4 4 2048 sync min_recovery_rate 20 \
|
||||||
5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
|
5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
|
||||||
|
|
||||||
|
|
||||||
|
Status Output
|
||||||
|
-------------
|
||||||
'dmsetup table' displays the table used to construct the mapping.
|
'dmsetup table' displays the table used to construct the mapping.
|
||||||
The optional parameters are always printed in the order listed
|
The optional parameters are always printed in the order listed
|
||||||
above with "sync" or "nosync" always output ahead of the other
|
above with "sync" or "nosync" always output ahead of the other
|
||||||
arguments, regardless of the order used when originally loading the table.
|
arguments, regardless of the order used when originally loading the table.
|
||||||
Arguments that can be repeated are ordered by value.
|
Arguments that can be repeated are ordered by value.
|
||||||
|
|
||||||
'dmsetup status' yields information on the state and health of the
|
|
||||||
array.
|
'dmsetup status' yields information on the state and health of the array.
|
||||||
The output is as follows:
|
The output is as follows (normally a single line, but expanded here for
|
||||||
|
clarity):
|
||||||
1: <s> <l> raid \
|
1: <s> <l> raid \
|
||||||
2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
|
2: <raid_type> <#devices> <health_chars> \
|
||||||
|
3: <sync_ratio> <sync_action> <mismatch_cnt>
|
||||||
|
|
||||||
Line 1 is the standard output produced by device-mapper.
|
Line 1 is the standard output produced by device-mapper.
|
||||||
Line 2 is produced by the raid target, and best explained by example:
|
Line 2 & 3 are produced by the raid target and are best explained by example:
|
||||||
0 1960893648 raid raid4 5 AAAAA 2/490221568
|
0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
|
||||||
Here we can see the RAID type is raid4, there are 5 devices - all of
|
Here we can see the RAID type is raid4, there are 5 devices - all of
|
||||||
which are 'A'live, and the array is 2/490221568 complete with recovery.
|
which are 'A'live, and the array is 2/490221568 complete with its initial
|
||||||
Faulty or missing devices are marked 'D'. Devices that are out-of-sync
|
recovery. Here is a fuller description of the individual fields:
|
||||||
are marked 'a'.
|
<raid_type> Same as the <raid_type> used to create the array.
|
||||||
|
<health_chars> One char for each device, indicating: 'A' = alive and
|
||||||
|
in-sync, 'a' = alive but not in-sync, 'D' = dead/failed.
|
||||||
|
<sync_ratio> The ratio indicating how much of the array has undergone
|
||||||
|
the process described by 'sync_action'. If the
|
||||||
|
'sync_action' is "check" or "repair", then the process
|
||||||
|
of "resync" or "recover" can be considered complete.
|
||||||
|
<sync_action> One of the following possible states:
|
||||||
|
idle - No synchronization action is being performed.
|
||||||
|
frozen - The current action has been halted.
|
||||||
|
resync - Array is undergoing its initial synchronization
|
||||||
|
or is resynchronizing after an unclean shutdown
|
||||||
|
(possibly aided by a bitmap).
|
||||||
|
recover - A device in the array is being rebuilt or
|
||||||
|
replaced.
|
||||||
|
check - A user-initiated full check of the array is
|
||||||
|
being performed. All blocks are read and
|
||||||
|
checked for consistency. The number of
|
||||||
|
discrepancies found are recorded in
|
||||||
|
<mismatch_cnt>. No changes are made to the
|
||||||
|
array by this action.
|
||||||
|
repair - The same as "check", but discrepancies are
|
||||||
|
corrected.
|
||||||
|
reshape - The array is undergoing a reshape.
|
||||||
|
<mismatch_cnt> The number of discrepancies found between mirror copies
|
||||||
|
in RAID1/10 or wrong parity values found in RAID4/5/6.
|
||||||
|
This value is valid only after a "check" of the array
|
||||||
|
is performed. A healthy array has a 'mismatch_cnt' of 0.
|
||||||
|
|
||||||
|
Message Interface
|
||||||
|
-----------------
|
||||||
|
The dm-raid target will accept certain actions through the 'message' interface.
|
||||||
|
('man dmsetup' for more information on the message interface.) These actions
|
||||||
|
include:
|
||||||
|
"idle" - Halt the current sync action.
|
||||||
|
"frozen" - Freeze the current sync action.
|
||||||
|
"resync" - Initiate/continue a resync.
|
||||||
|
"recover"- Initiate/continue a recover process.
|
||||||
|
"check" - Initiate a check (i.e. a "scrub") of the array.
|
||||||
|
"repair" - Initiate a repair of the array.
|
||||||
|
"reshape"- Currently unsupported (-EINVAL).
|
||||||
|
|
||||||
Version History
|
Version History
|
||||||
---------------
|
---------------
|
||||||
|
@ -171,4 +218,7 @@ Version History
|
||||||
1.3.1 Allow device replacement/rebuild for RAID 10
|
1.3.1 Allow device replacement/rebuild for RAID 10
|
||||||
1.3.2 Fix/improve redundancy checking for RAID10
|
1.3.2 Fix/improve redundancy checking for RAID10
|
||||||
1.4.0 Non-functional change. Removes arg from mapping function.
|
1.4.0 Non-functional change. Removes arg from mapping function.
|
||||||
1.4.1 Add RAID10 "far" and "offset" algorithm support.
|
1.4.1 RAID10 fix redundancy validation checks (commit 55ebbb5).
|
||||||
|
1.4.2 Add RAID10 "far" and "offset" algorithm support.
|
||||||
|
1.5.0 Add message interface to allow manipulation of the sync_action.
|
||||||
|
New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
|
||||||
|
|
|
@ -119,7 +119,7 @@ device to add.
|
||||||
The array is started with the RUN_ARRAY ioctl.
|
The array is started with the RUN_ARRAY ioctl.
|
||||||
|
|
||||||
Once started, new devices can be added. They should have an
|
Once started, new devices can be added. They should have an
|
||||||
appropriate superblock written to them, and then passed be in with
|
appropriate superblock written to them, and then be passed in with
|
||||||
ADD_NEW_DISK.
|
ADD_NEW_DISK.
|
||||||
|
|
||||||
Devices that have failed or are not yet active can be detached from an
|
Devices that have failed or are not yet active can be detached from an
|
||||||
|
@ -131,7 +131,7 @@ Specific Rules that apply to format-0 super block arrays, and
|
||||||
-------------------------------------------------------------
|
-------------------------------------------------------------
|
||||||
|
|
||||||
An array can be 'created' by describing the array (level, chunksize
|
An array can be 'created' by describing the array (level, chunksize
|
||||||
etc) in a SET_ARRAY_INFO ioctl. This must has major_version==0 and
|
etc) in a SET_ARRAY_INFO ioctl. This must have major_version==0 and
|
||||||
raid_disks != 0.
|
raid_disks != 0.
|
||||||
|
|
||||||
Then uninitialized devices can be added with ADD_NEW_DISK. The
|
Then uninitialized devices can be added with ADD_NEW_DISK. The
|
||||||
|
@ -426,7 +426,7 @@ Each directory contains:
|
||||||
offset
|
offset
|
||||||
This gives the location in the device (in sectors from the
|
This gives the location in the device (in sectors from the
|
||||||
start) where data from the array will be stored. Any part of
|
start) where data from the array will be stored. Any part of
|
||||||
the device before this offset us not touched, unless it is
|
the device before this offset is not touched, unless it is
|
||||||
used for storing metadata (Formats 1.1 and 1.2).
|
used for storing metadata (Formats 1.1 and 1.2).
|
||||||
|
|
||||||
size
|
size
|
||||||
|
@ -440,7 +440,7 @@ Each directory contains:
|
||||||
When the device is not 'in_sync', this records the number of
|
When the device is not 'in_sync', this records the number of
|
||||||
sectors from the start of the device which are known to be
|
sectors from the start of the device which are known to be
|
||||||
correct. This is normally zero, but during a recovery
|
correct. This is normally zero, but during a recovery
|
||||||
operation is will steadily increase, and if the recovery is
|
operation it will steadily increase, and if the recovery is
|
||||||
interrupted, restoring this value can cause recovery to
|
interrupted, restoring this value can cause recovery to
|
||||||
avoid repeating the earlier blocks. With v1.x metadata, this
|
avoid repeating the earlier blocks. With v1.x metadata, this
|
||||||
value is saved and restored automatically.
|
value is saved and restored automatically.
|
||||||
|
@ -468,7 +468,7 @@ Each directory contains:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
An active md device will also contain and entry for each active device
|
An active md device will also contain an entry for each active device
|
||||||
in the array. These are named
|
in the array. These are named
|
||||||
|
|
||||||
rdNN
|
rdNN
|
||||||
|
@ -482,7 +482,7 @@ will show 'in_sync' on every line.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Active md devices for levels that support data redundancy (1,4,5,6)
|
Active md devices for levels that support data redundancy (1,4,5,6,10)
|
||||||
also have
|
also have
|
||||||
|
|
||||||
sync_action
|
sync_action
|
||||||
|
@ -494,7 +494,7 @@ also have
|
||||||
failed/missing device
|
failed/missing device
|
||||||
idle - nothing is happening
|
idle - nothing is happening
|
||||||
check - A full check of redundancy was requested and is
|
check - A full check of redundancy was requested and is
|
||||||
happening. This reads all block and checks
|
happening. This reads all blocks and checks
|
||||||
them. A repair may also happen for some raid
|
them. A repair may also happen for some raid
|
||||||
levels.
|
levels.
|
||||||
repair - A full check and repair is happening. This is
|
repair - A full check and repair is happening. This is
|
||||||
|
@ -522,7 +522,7 @@ also have
|
||||||
|
|
||||||
degraded
|
degraded
|
||||||
This contains a count of the number of devices by which the
|
This contains a count of the number of devices by which the
|
||||||
arrays is degraded. So an optimal array with show '0'. A
|
arrays is degraded. So an optimal array will show '0'. A
|
||||||
single failed/missing drive will show '1', etc.
|
single failed/missing drive will show '1', etc.
|
||||||
This file responds to select/poll, any increase or decrease
|
This file responds to select/poll, any increase or decrease
|
||||||
in the count of missing devices will trigger an event.
|
in the count of missing devices will trigger an event.
|
||||||
|
|
|
@ -846,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
||||||
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
||||||
set_bit(bit, kaddr);
|
set_bit(bit, kaddr);
|
||||||
else
|
else
|
||||||
test_and_set_bit_le(bit, kaddr);
|
set_bit_le(bit, kaddr);
|
||||||
kunmap_atomic(kaddr);
|
kunmap_atomic(kaddr);
|
||||||
pr_debug("set file bit %lu page %lu\n", bit, page->index);
|
pr_debug("set file bit %lu page %lu\n", bit, page->index);
|
||||||
/* record page number so it gets flushed to disk when unplug occurs */
|
/* record page number so it gets flushed to disk when unplug occurs */
|
||||||
|
@ -868,7 +868,7 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
|
||||||
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
||||||
clear_bit(bit, paddr);
|
clear_bit(bit, paddr);
|
||||||
else
|
else
|
||||||
test_and_clear_bit_le(bit, paddr);
|
clear_bit_le(bit, paddr);
|
||||||
kunmap_atomic(paddr);
|
kunmap_atomic(paddr);
|
||||||
if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
|
if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
|
||||||
set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
|
set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
|
||||||
|
|
|
@ -1279,6 +1279,31 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
|
||||||
return DM_MAPIO_SUBMITTED;
|
return DM_MAPIO_SUBMITTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *decipher_sync_action(struct mddev *mddev)
|
||||||
|
{
|
||||||
|
if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
|
||||||
|
return "frozen";
|
||||||
|
|
||||||
|
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||||
|
(!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
|
||||||
|
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||||
|
return "reshape";
|
||||||
|
|
||||||
|
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||||
|
if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||||
|
return "resync";
|
||||||
|
else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
|
||||||
|
return "check";
|
||||||
|
return "repair";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
|
||||||
|
return "recover";
|
||||||
|
}
|
||||||
|
|
||||||
|
return "idle";
|
||||||
|
}
|
||||||
|
|
||||||
static void raid_status(struct dm_target *ti, status_type_t type,
|
static void raid_status(struct dm_target *ti, status_type_t type,
|
||||||
unsigned status_flags, char *result, unsigned maxlen)
|
unsigned status_flags, char *result, unsigned maxlen)
|
||||||
{
|
{
|
||||||
|
@ -1298,8 +1323,18 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
||||||
sync = rs->md.recovery_cp;
|
sync = rs->md.recovery_cp;
|
||||||
|
|
||||||
if (sync >= rs->md.resync_max_sectors) {
|
if (sync >= rs->md.resync_max_sectors) {
|
||||||
|
/*
|
||||||
|
* Sync complete.
|
||||||
|
*/
|
||||||
array_in_sync = 1;
|
array_in_sync = 1;
|
||||||
sync = rs->md.resync_max_sectors;
|
sync = rs->md.resync_max_sectors;
|
||||||
|
} else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) {
|
||||||
|
/*
|
||||||
|
* If "check" or "repair" is occurring, the array has
|
||||||
|
* undergone and initial sync and the health characters
|
||||||
|
* should not be 'a' anymore.
|
||||||
|
*/
|
||||||
|
array_in_sync = 1;
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* The array may be doing an initial sync, or it may
|
* The array may be doing an initial sync, or it may
|
||||||
|
@ -1311,6 +1346,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
||||||
if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
|
if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
|
||||||
array_in_sync = 1;
|
array_in_sync = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Status characters:
|
* Status characters:
|
||||||
* 'D' = Dead/Failed device
|
* 'D' = Dead/Failed device
|
||||||
|
@ -1339,6 +1375,21 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
||||||
(unsigned long long) sync,
|
(unsigned long long) sync,
|
||||||
(unsigned long long) rs->md.resync_max_sectors);
|
(unsigned long long) rs->md.resync_max_sectors);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sync action:
|
||||||
|
* See Documentation/device-mapper/dm-raid.c for
|
||||||
|
* information on each of these states.
|
||||||
|
*/
|
||||||
|
DMEMIT(" %s", decipher_sync_action(&rs->md));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* resync_mismatches/mismatch_cnt
|
||||||
|
* This field shows the number of discrepancies found when
|
||||||
|
* performing a "check" of the array.
|
||||||
|
*/
|
||||||
|
DMEMIT(" %llu",
|
||||||
|
(unsigned long long)
|
||||||
|
atomic64_read(&rs->md.resync_mismatches));
|
||||||
break;
|
break;
|
||||||
case STATUSTYPE_TABLE:
|
case STATUSTYPE_TABLE:
|
||||||
/* The string you would use to construct this array */
|
/* The string you would use to construct this array */
|
||||||
|
@ -1425,7 +1476,62 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
|
static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
|
||||||
|
{
|
||||||
|
struct raid_set *rs = ti->private;
|
||||||
|
struct mddev *mddev = &rs->md;
|
||||||
|
|
||||||
|
if (!strcasecmp(argv[0], "reshape")) {
|
||||||
|
DMERR("Reshape not supported.");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!mddev->pers || !mddev->pers->sync_request)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!strcasecmp(argv[0], "frozen"))
|
||||||
|
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||||
|
else
|
||||||
|
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||||
|
|
||||||
|
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
|
||||||
|
if (mddev->sync_thread) {
|
||||||
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
|
md_reap_sync_thread(mddev);
|
||||||
|
}
|
||||||
|
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||||
|
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
|
||||||
|
return -EBUSY;
|
||||||
|
else if (!strcasecmp(argv[0], "resync"))
|
||||||
|
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||||
|
else if (!strcasecmp(argv[0], "recover")) {
|
||||||
|
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||||
|
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||||
|
} else {
|
||||||
|
if (!strcasecmp(argv[0], "check"))
|
||||||
|
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||||
|
else if (!!strcasecmp(argv[0], "repair"))
|
||||||
|
return -EINVAL;
|
||||||
|
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||||
|
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||||
|
}
|
||||||
|
if (mddev->ro == 2) {
|
||||||
|
/* A write to sync_action is enough to justify
|
||||||
|
* canceling read-auto mode
|
||||||
|
*/
|
||||||
|
mddev->ro = 0;
|
||||||
|
if (!mddev->suspended)
|
||||||
|
md_wakeup_thread(mddev->sync_thread);
|
||||||
|
}
|
||||||
|
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||||
|
if (!mddev->suspended)
|
||||||
|
md_wakeup_thread(mddev->thread);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int raid_iterate_devices(struct dm_target *ti,
|
||||||
|
iterate_devices_callout_fn fn, void *data)
|
||||||
{
|
{
|
||||||
struct raid_set *rs = ti->private;
|
struct raid_set *rs = ti->private;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
@ -1482,12 +1588,13 @@ static void raid_resume(struct dm_target *ti)
|
||||||
|
|
||||||
static struct target_type raid_target = {
|
static struct target_type raid_target = {
|
||||||
.name = "raid",
|
.name = "raid",
|
||||||
.version = {1, 4, 2},
|
.version = {1, 5, 0},
|
||||||
.module = THIS_MODULE,
|
.module = THIS_MODULE,
|
||||||
.ctr = raid_ctr,
|
.ctr = raid_ctr,
|
||||||
.dtr = raid_dtr,
|
.dtr = raid_dtr,
|
||||||
.map = raid_map,
|
.map = raid_map,
|
||||||
.status = raid_status,
|
.status = raid_status,
|
||||||
|
.message = raid_message,
|
||||||
.iterate_devices = raid_iterate_devices,
|
.iterate_devices = raid_iterate_devices,
|
||||||
.io_hints = raid_io_hints,
|
.io_hints = raid_io_hints,
|
||||||
.presuspend = raid_presuspend,
|
.presuspend = raid_presuspend,
|
||||||
|
|
219
drivers/md/md.c
219
drivers/md/md.c
|
@ -72,6 +72,9 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
|
||||||
static struct workqueue_struct *md_wq;
|
static struct workqueue_struct *md_wq;
|
||||||
static struct workqueue_struct *md_misc_wq;
|
static struct workqueue_struct *md_misc_wq;
|
||||||
|
|
||||||
|
static int remove_and_add_spares(struct mddev *mddev,
|
||||||
|
struct md_rdev *this);
|
||||||
|
|
||||||
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
|
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1564,8 +1567,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
||||||
sector, count, 1) == 0)
|
sector, count, 1) == 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
} else if (sb->bblog_offset == 0)
|
} else if (sb->bblog_offset != 0)
|
||||||
rdev->badblocks.shift = -1;
|
rdev->badblocks.shift = 0;
|
||||||
|
|
||||||
if (!refdev) {
|
if (!refdev) {
|
||||||
ret = 1;
|
ret = 1;
|
||||||
|
@ -2411,6 +2414,11 @@ static void md_update_sb(struct mddev * mddev, int force_change)
|
||||||
int nospares = 0;
|
int nospares = 0;
|
||||||
int any_badblocks_changed = 0;
|
int any_badblocks_changed = 0;
|
||||||
|
|
||||||
|
if (mddev->ro) {
|
||||||
|
if (force_change)
|
||||||
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
|
return;
|
||||||
|
}
|
||||||
repeat:
|
repeat:
|
||||||
/* First make sure individual recovery_offsets are correct */
|
/* First make sure individual recovery_offsets are correct */
|
||||||
rdev_for_each(rdev, mddev) {
|
rdev_for_each(rdev, mddev) {
|
||||||
|
@ -2800,12 +2808,10 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||||
/* personality does all needed checks */
|
/* personality does all needed checks */
|
||||||
if (rdev->mddev->pers->hot_remove_disk == NULL)
|
if (rdev->mddev->pers->hot_remove_disk == NULL)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
err = rdev->mddev->pers->
|
clear_bit(Blocked, &rdev->flags);
|
||||||
hot_remove_disk(rdev->mddev, rdev);
|
remove_and_add_spares(rdev->mddev, rdev);
|
||||||
if (err)
|
if (rdev->raid_disk >= 0)
|
||||||
return err;
|
return -EBUSY;
|
||||||
sysfs_unlink_rdev(rdev->mddev, rdev);
|
|
||||||
rdev->raid_disk = -1;
|
|
||||||
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
|
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
|
||||||
md_wakeup_thread(rdev->mddev->thread);
|
md_wakeup_thread(rdev->mddev->thread);
|
||||||
} else if (rdev->mddev->pers) {
|
} else if (rdev->mddev->pers) {
|
||||||
|
@ -3221,7 +3227,7 @@ int md_rdev_init(struct md_rdev *rdev)
|
||||||
* be used - I wonder if that matters
|
* be used - I wonder if that matters
|
||||||
*/
|
*/
|
||||||
rdev->badblocks.count = 0;
|
rdev->badblocks.count = 0;
|
||||||
rdev->badblocks.shift = 0;
|
rdev->badblocks.shift = -1; /* disabled until explicitly enabled */
|
||||||
rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
||||||
seqlock_init(&rdev->badblocks.lock);
|
seqlock_init(&rdev->badblocks.lock);
|
||||||
if (rdev->badblocks.page == NULL)
|
if (rdev->badblocks.page == NULL)
|
||||||
|
@ -3293,9 +3299,6 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
|
||||||
goto abort_free;
|
goto abort_free;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (super_format == -1)
|
|
||||||
/* hot-add for 0.90, or non-persistent: so no badblocks */
|
|
||||||
rdev->badblocks.shift = -1;
|
|
||||||
|
|
||||||
return rdev;
|
return rdev;
|
||||||
|
|
||||||
|
@ -4225,8 +4228,6 @@ action_show(struct mddev *mddev, char *page)
|
||||||
return sprintf(page, "%s\n", type);
|
return sprintf(page, "%s\n", type);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void reap_sync_thread(struct mddev *mddev);
|
|
||||||
|
|
||||||
static ssize_t
|
static ssize_t
|
||||||
action_store(struct mddev *mddev, const char *page, size_t len)
|
action_store(struct mddev *mddev, const char *page, size_t len)
|
||||||
{
|
{
|
||||||
|
@ -4241,7 +4242,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
||||||
if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
|
if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
|
||||||
if (mddev->sync_thread) {
|
if (mddev->sync_thread) {
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
reap_sync_thread(mddev);
|
md_reap_sync_thread(mddev);
|
||||||
}
|
}
|
||||||
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
|
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
|
||||||
|
@ -5279,7 +5280,7 @@ static void __md_stop_writes(struct mddev *mddev)
|
||||||
if (mddev->sync_thread) {
|
if (mddev->sync_thread) {
|
||||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
reap_sync_thread(mddev);
|
md_reap_sync_thread(mddev);
|
||||||
}
|
}
|
||||||
|
|
||||||
del_timer_sync(&mddev->safemode_timer);
|
del_timer_sync(&mddev->safemode_timer);
|
||||||
|
@ -5287,7 +5288,8 @@ static void __md_stop_writes(struct mddev *mddev)
|
||||||
bitmap_flush(mddev);
|
bitmap_flush(mddev);
|
||||||
md_super_wait(mddev);
|
md_super_wait(mddev);
|
||||||
|
|
||||||
if (!mddev->in_sync || mddev->flags) {
|
if (mddev->ro == 0 &&
|
||||||
|
(!mddev->in_sync || mddev->flags)) {
|
||||||
/* mark array as shutdown cleanly */
|
/* mark array as shutdown cleanly */
|
||||||
mddev->in_sync = 1;
|
mddev->in_sync = 1;
|
||||||
md_update_sb(mddev, 1);
|
md_update_sb(mddev, 1);
|
||||||
|
@ -5810,7 +5812,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
|
||||||
else
|
else
|
||||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||||
|
|
||||||
md_update_sb(mddev, 1);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
if (mddev->degraded)
|
if (mddev->degraded)
|
||||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||||
|
@ -5877,6 +5879,9 @@ static int hot_remove_disk(struct mddev * mddev, dev_t dev)
|
||||||
if (!rdev)
|
if (!rdev)
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
|
|
||||||
|
clear_bit(Blocked, &rdev->flags);
|
||||||
|
remove_and_add_spares(mddev, rdev);
|
||||||
|
|
||||||
if (rdev->raid_disk >= 0)
|
if (rdev->raid_disk >= 0)
|
||||||
goto busy;
|
goto busy;
|
||||||
|
|
||||||
|
@ -6490,6 +6495,28 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
|
||||||
err = md_set_readonly(mddev, bdev);
|
err = md_set_readonly(mddev, bdev);
|
||||||
goto done_unlock;
|
goto done_unlock;
|
||||||
|
|
||||||
|
case HOT_REMOVE_DISK:
|
||||||
|
err = hot_remove_disk(mddev, new_decode_dev(arg));
|
||||||
|
goto done_unlock;
|
||||||
|
|
||||||
|
case ADD_NEW_DISK:
|
||||||
|
/* We can support ADD_NEW_DISK on read-only arrays
|
||||||
|
* on if we are re-adding a preexisting device.
|
||||||
|
* So require mddev->pers and MD_DISK_SYNC.
|
||||||
|
*/
|
||||||
|
if (mddev->pers) {
|
||||||
|
mdu_disk_info_t info;
|
||||||
|
if (copy_from_user(&info, argp, sizeof(info)))
|
||||||
|
err = -EFAULT;
|
||||||
|
else if (!(info.state & (1<<MD_DISK_SYNC)))
|
||||||
|
/* Need to clear read-only for this */
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
err = add_new_disk(mddev, &info);
|
||||||
|
goto done_unlock;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case BLKROSET:
|
case BLKROSET:
|
||||||
if (get_user(ro, (int __user *)(arg))) {
|
if (get_user(ro, (int __user *)(arg))) {
|
||||||
err = -EFAULT;
|
err = -EFAULT;
|
||||||
|
@ -6560,10 +6587,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
|
||||||
goto done_unlock;
|
goto done_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
case HOT_REMOVE_DISK:
|
|
||||||
err = hot_remove_disk(mddev, new_decode_dev(arg));
|
|
||||||
goto done_unlock;
|
|
||||||
|
|
||||||
case HOT_ADD_DISK:
|
case HOT_ADD_DISK:
|
||||||
err = hot_add_disk(mddev, new_decode_dev(arg));
|
err = hot_add_disk(mddev, new_decode_dev(arg));
|
||||||
goto done_unlock;
|
goto done_unlock;
|
||||||
|
@ -7644,14 +7667,16 @@ void md_do_sync(struct md_thread *thread)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(md_do_sync);
|
EXPORT_SYMBOL_GPL(md_do_sync);
|
||||||
|
|
||||||
static int remove_and_add_spares(struct mddev *mddev)
|
static int remove_and_add_spares(struct mddev *mddev,
|
||||||
|
struct md_rdev *this)
|
||||||
{
|
{
|
||||||
struct md_rdev *rdev;
|
struct md_rdev *rdev;
|
||||||
int spares = 0;
|
int spares = 0;
|
||||||
int removed = 0;
|
int removed = 0;
|
||||||
|
|
||||||
rdev_for_each(rdev, mddev)
|
rdev_for_each(rdev, mddev)
|
||||||
if (rdev->raid_disk >= 0 &&
|
if ((this == NULL || rdev == this) &&
|
||||||
|
rdev->raid_disk >= 0 &&
|
||||||
!test_bit(Blocked, &rdev->flags) &&
|
!test_bit(Blocked, &rdev->flags) &&
|
||||||
(test_bit(Faulty, &rdev->flags) ||
|
(test_bit(Faulty, &rdev->flags) ||
|
||||||
! test_bit(In_sync, &rdev->flags)) &&
|
! test_bit(In_sync, &rdev->flags)) &&
|
||||||
|
@ -7666,14 +7691,37 @@ static int remove_and_add_spares(struct mddev *mddev)
|
||||||
if (removed && mddev->kobj.sd)
|
if (removed && mddev->kobj.sd)
|
||||||
sysfs_notify(&mddev->kobj, NULL, "degraded");
|
sysfs_notify(&mddev->kobj, NULL, "degraded");
|
||||||
|
|
||||||
|
if (this)
|
||||||
|
goto no_add;
|
||||||
|
|
||||||
rdev_for_each(rdev, mddev) {
|
rdev_for_each(rdev, mddev) {
|
||||||
if (rdev->raid_disk >= 0 &&
|
if (rdev->raid_disk >= 0 &&
|
||||||
!test_bit(In_sync, &rdev->flags) &&
|
!test_bit(In_sync, &rdev->flags) &&
|
||||||
!test_bit(Faulty, &rdev->flags))
|
!test_bit(Faulty, &rdev->flags))
|
||||||
spares++;
|
spares++;
|
||||||
if (rdev->raid_disk < 0
|
if (rdev->raid_disk >= 0)
|
||||||
&& !test_bit(Faulty, &rdev->flags)) {
|
continue;
|
||||||
|
if (test_bit(Faulty, &rdev->flags))
|
||||||
|
continue;
|
||||||
|
if (mddev->ro &&
|
||||||
|
rdev->saved_raid_disk < 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
rdev->recovery_offset = 0;
|
rdev->recovery_offset = 0;
|
||||||
|
if (rdev->saved_raid_disk >= 0 && mddev->in_sync) {
|
||||||
|
spin_lock_irq(&mddev->write_lock);
|
||||||
|
if (mddev->in_sync)
|
||||||
|
/* OK, this device, which is in_sync,
|
||||||
|
* will definitely be noticed before
|
||||||
|
* the next write, so recovery isn't
|
||||||
|
* needed.
|
||||||
|
*/
|
||||||
|
rdev->recovery_offset = mddev->recovery_cp;
|
||||||
|
spin_unlock_irq(&mddev->write_lock);
|
||||||
|
}
|
||||||
|
if (mddev->ro && rdev->recovery_offset != MaxSector)
|
||||||
|
/* not safe to add this disk now */
|
||||||
|
continue;
|
||||||
if (mddev->pers->
|
if (mddev->pers->
|
||||||
hot_add_disk(mddev, rdev) == 0) {
|
hot_add_disk(mddev, rdev) == 0) {
|
||||||
if (sysfs_link_rdev(mddev, rdev))
|
if (sysfs_link_rdev(mddev, rdev))
|
||||||
|
@ -7683,57 +7731,12 @@ static int remove_and_add_spares(struct mddev *mddev)
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
no_add:
|
||||||
if (removed)
|
if (removed)
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
return spares;
|
return spares;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void reap_sync_thread(struct mddev *mddev)
|
|
||||||
{
|
|
||||||
struct md_rdev *rdev;
|
|
||||||
|
|
||||||
/* resync has finished, collect result */
|
|
||||||
md_unregister_thread(&mddev->sync_thread);
|
|
||||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
|
||||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
|
||||||
/* success...*/
|
|
||||||
/* activate any spares */
|
|
||||||
if (mddev->pers->spare_active(mddev)) {
|
|
||||||
sysfs_notify(&mddev->kobj, NULL,
|
|
||||||
"degraded");
|
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
|
||||||
mddev->pers->finish_reshape)
|
|
||||||
mddev->pers->finish_reshape(mddev);
|
|
||||||
|
|
||||||
/* If array is no-longer degraded, then any saved_raid_disk
|
|
||||||
* information must be scrapped. Also if any device is now
|
|
||||||
* In_sync we must scrape the saved_raid_disk for that device
|
|
||||||
* do the superblock for an incrementally recovered device
|
|
||||||
* written out.
|
|
||||||
*/
|
|
||||||
rdev_for_each(rdev, mddev)
|
|
||||||
if (!mddev->degraded ||
|
|
||||||
test_bit(In_sync, &rdev->flags))
|
|
||||||
rdev->saved_raid_disk = -1;
|
|
||||||
|
|
||||||
md_update_sb(mddev, 1);
|
|
||||||
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
|
||||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
|
||||||
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
|
||||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
|
||||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
|
||||||
/* flag recovery needed just to double check */
|
|
||||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
|
||||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
|
||||||
md_new_event(mddev);
|
|
||||||
if (mddev->event_work.func)
|
|
||||||
queue_work(md_misc_wq, &mddev->event_work);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This routine is regularly called by all per-raid-array threads to
|
* This routine is regularly called by all per-raid-array threads to
|
||||||
* deal with generic issues like resync and super-block update.
|
* deal with generic issues like resync and super-block update.
|
||||||
|
@ -7789,22 +7792,16 @@ void md_check_recovery(struct mddev *mddev)
|
||||||
int spares = 0;
|
int spares = 0;
|
||||||
|
|
||||||
if (mddev->ro) {
|
if (mddev->ro) {
|
||||||
/* Only thing we do on a ro array is remove
|
/* On a read-only array we can:
|
||||||
* failed devices.
|
* - remove failed devices
|
||||||
|
* - add already-in_sync devices if the array itself
|
||||||
|
* is in-sync.
|
||||||
|
* As we only add devices that are already in-sync,
|
||||||
|
* we can activate the spares immediately.
|
||||||
*/
|
*/
|
||||||
struct md_rdev *rdev;
|
|
||||||
rdev_for_each(rdev, mddev)
|
|
||||||
if (rdev->raid_disk >= 0 &&
|
|
||||||
!test_bit(Blocked, &rdev->flags) &&
|
|
||||||
test_bit(Faulty, &rdev->flags) &&
|
|
||||||
atomic_read(&rdev->nr_pending)==0) {
|
|
||||||
if (mddev->pers->hot_remove_disk(
|
|
||||||
mddev, rdev) == 0) {
|
|
||||||
sysfs_unlink_rdev(mddev, rdev);
|
|
||||||
rdev->raid_disk = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||||
|
remove_and_add_spares(mddev, NULL);
|
||||||
|
mddev->pers->spare_active(mddev);
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7836,7 +7833,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
if (mddev->sync_thread) {
|
if (mddev->sync_thread) {
|
||||||
reap_sync_thread(mddev);
|
md_reap_sync_thread(mddev);
|
||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
/* Set RUNNING before clearing NEEDED to avoid
|
/* Set RUNNING before clearing NEEDED to avoid
|
||||||
|
@ -7867,7 +7864,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||||
goto unlock;
|
goto unlock;
|
||||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||||
} else if ((spares = remove_and_add_spares(mddev))) {
|
} else if ((spares = remove_and_add_spares(mddev, NULL))) {
|
||||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||||
|
@ -7917,6 +7914,51 @@ void md_check_recovery(struct mddev *mddev)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void md_reap_sync_thread(struct mddev *mddev)
|
||||||
|
{
|
||||||
|
struct md_rdev *rdev;
|
||||||
|
|
||||||
|
/* resync has finished, collect result */
|
||||||
|
md_unregister_thread(&mddev->sync_thread);
|
||||||
|
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||||
|
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||||
|
/* success...*/
|
||||||
|
/* activate any spares */
|
||||||
|
if (mddev->pers->spare_active(mddev)) {
|
||||||
|
sysfs_notify(&mddev->kobj, NULL,
|
||||||
|
"degraded");
|
||||||
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||||
|
mddev->pers->finish_reshape)
|
||||||
|
mddev->pers->finish_reshape(mddev);
|
||||||
|
|
||||||
|
/* If array is no-longer degraded, then any saved_raid_disk
|
||||||
|
* information must be scrapped. Also if any device is now
|
||||||
|
* In_sync we must scrape the saved_raid_disk for that device
|
||||||
|
* do the superblock for an incrementally recovered device
|
||||||
|
* written out.
|
||||||
|
*/
|
||||||
|
rdev_for_each(rdev, mddev)
|
||||||
|
if (!mddev->degraded ||
|
||||||
|
test_bit(In_sync, &rdev->flags))
|
||||||
|
rdev->saved_raid_disk = -1;
|
||||||
|
|
||||||
|
md_update_sb(mddev, 1);
|
||||||
|
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||||
|
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||||
|
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||||
|
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||||
|
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||||
|
/* flag recovery needed just to double check */
|
||||||
|
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||||
|
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||||
|
md_new_event(mddev);
|
||||||
|
if (mddev->event_work.func)
|
||||||
|
queue_work(md_misc_wq, &mddev->event_work);
|
||||||
|
}
|
||||||
|
|
||||||
void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
||||||
{
|
{
|
||||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||||
|
@ -8642,6 +8684,7 @@ EXPORT_SYMBOL(md_register_thread);
|
||||||
EXPORT_SYMBOL(md_unregister_thread);
|
EXPORT_SYMBOL(md_unregister_thread);
|
||||||
EXPORT_SYMBOL(md_wakeup_thread);
|
EXPORT_SYMBOL(md_wakeup_thread);
|
||||||
EXPORT_SYMBOL(md_check_recovery);
|
EXPORT_SYMBOL(md_check_recovery);
|
||||||
|
EXPORT_SYMBOL(md_reap_sync_thread);
|
||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
MODULE_DESCRIPTION("MD RAID framework");
|
MODULE_DESCRIPTION("MD RAID framework");
|
||||||
MODULE_ALIAS("md");
|
MODULE_ALIAS("md");
|
||||||
|
|
|
@ -567,6 +567,7 @@ extern struct md_thread *md_register_thread(
|
||||||
extern void md_unregister_thread(struct md_thread **threadp);
|
extern void md_unregister_thread(struct md_thread **threadp);
|
||||||
extern void md_wakeup_thread(struct md_thread *thread);
|
extern void md_wakeup_thread(struct md_thread *thread);
|
||||||
extern void md_check_recovery(struct mddev *mddev);
|
extern void md_check_recovery(struct mddev *mddev);
|
||||||
|
extern void md_reap_sync_thread(struct mddev *mddev);
|
||||||
extern void md_write_start(struct mddev *mddev, struct bio *bi);
|
extern void md_write_start(struct mddev *mddev, struct bio *bi);
|
||||||
extern void md_write_end(struct mddev *mddev);
|
extern void md_write_end(struct mddev *mddev);
|
||||||
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
|
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
|
||||||
|
|
|
@ -981,6 +981,11 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||||
while (bio) { /* submit pending writes */
|
while (bio) { /* submit pending writes */
|
||||||
struct bio *next = bio->bi_next;
|
struct bio *next = bio->bi_next;
|
||||||
bio->bi_next = NULL;
|
bio->bi_next = NULL;
|
||||||
|
if (unlikely((bio->bi_rw & REQ_DISCARD) &&
|
||||||
|
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
|
||||||
|
/* Just ignore it */
|
||||||
|
bio_endio(bio, 0);
|
||||||
|
else
|
||||||
generic_make_request(bio);
|
generic_make_request(bio);
|
||||||
bio = next;
|
bio = next;
|
||||||
}
|
}
|
||||||
|
@ -2901,6 +2906,7 @@ static int stop(struct mddev *mddev)
|
||||||
if (conf->r1bio_pool)
|
if (conf->r1bio_pool)
|
||||||
mempool_destroy(conf->r1bio_pool);
|
mempool_destroy(conf->r1bio_pool);
|
||||||
kfree(conf->mirrors);
|
kfree(conf->mirrors);
|
||||||
|
safe_put_page(conf->tmppage);
|
||||||
kfree(conf->poolinfo);
|
kfree(conf->poolinfo);
|
||||||
kfree(conf);
|
kfree(conf);
|
||||||
mddev->private = NULL;
|
mddev->private = NULL;
|
||||||
|
|
|
@ -1133,6 +1133,11 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||||
while (bio) { /* submit pending writes */
|
while (bio) { /* submit pending writes */
|
||||||
struct bio *next = bio->bi_next;
|
struct bio *next = bio->bi_next;
|
||||||
bio->bi_next = NULL;
|
bio->bi_next = NULL;
|
||||||
|
if (unlikely((bio->bi_rw & REQ_DISCARD) &&
|
||||||
|
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
|
||||||
|
/* Just ignore it */
|
||||||
|
bio_endio(bio, 0);
|
||||||
|
else
|
||||||
generic_make_request(bio);
|
generic_make_request(bio);
|
||||||
bio = next;
|
bio = next;
|
||||||
}
|
}
|
||||||
|
@ -2913,6 +2918,22 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||||
if (init_resync(conf))
|
if (init_resync(conf))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allow skipping a full rebuild for incremental assembly
|
||||||
|
* of a clean array, like RAID1 does.
|
||||||
|
*/
|
||||||
|
if (mddev->bitmap == NULL &&
|
||||||
|
mddev->recovery_cp == MaxSector &&
|
||||||
|
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
||||||
|
conf->fullsync == 0) {
|
||||||
|
*skipped = 1;
|
||||||
|
max_sector = mddev->dev_sectors;
|
||||||
|
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||||
|
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||||
|
max_sector = mddev->resync_max_sectors;
|
||||||
|
return max_sector - sector_nr;
|
||||||
|
}
|
||||||
|
|
||||||
skipped:
|
skipped:
|
||||||
max_sector = mddev->dev_sectors;
|
max_sector = mddev->dev_sectors;
|
||||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||||
|
@ -3810,6 +3831,7 @@ static int stop(struct mddev *mddev)
|
||||||
|
|
||||||
if (conf->r10bio_pool)
|
if (conf->r10bio_pool)
|
||||||
mempool_destroy(conf->r10bio_pool);
|
mempool_destroy(conf->r10bio_pool);
|
||||||
|
safe_put_page(conf->tmppage);
|
||||||
kfree(conf->mirrors);
|
kfree(conf->mirrors);
|
||||||
kfree(conf);
|
kfree(conf);
|
||||||
mddev->private = NULL;
|
mddev->private = NULL;
|
||||||
|
|
|
@ -1887,8 +1887,15 @@ static void raid5_end_write_request(struct bio *bi, int error)
|
||||||
&rdev->mddev->recovery);
|
&rdev->mddev->recovery);
|
||||||
} else if (is_badblock(rdev, sh->sector,
|
} else if (is_badblock(rdev, sh->sector,
|
||||||
STRIPE_SECTORS,
|
STRIPE_SECTORS,
|
||||||
&first_bad, &bad_sectors))
|
&first_bad, &bad_sectors)) {
|
||||||
set_bit(R5_MadeGood, &sh->dev[i].flags);
|
set_bit(R5_MadeGood, &sh->dev[i].flags);
|
||||||
|
if (test_bit(R5_ReadError, &sh->dev[i].flags))
|
||||||
|
/* That was a successful write so make
|
||||||
|
* sure it looks like we already did
|
||||||
|
* a re-write.
|
||||||
|
*/
|
||||||
|
set_bit(R5_ReWrite, &sh->dev[i].flags);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
rdev_dec_pending(rdev, conf->mddev);
|
rdev_dec_pending(rdev, conf->mddev);
|
||||||
|
|
||||||
|
@ -4672,9 +4679,10 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
|
||||||
*skipped = 1;
|
*skipped = 1;
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
|
if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
||||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
!conf->fullsync &&
|
||||||
!conf->fullsync && sync_blocks >= STRIPE_SECTORS) {
|
!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
|
||||||
|
sync_blocks >= STRIPE_SECTORS) {
|
||||||
/* we can skip this block, and probably more */
|
/* we can skip this block, and probably more */
|
||||||
sync_blocks /= STRIPE_SECTORS;
|
sync_blocks /= STRIPE_SECTORS;
|
||||||
*skipped = 1;
|
*skipped = 1;
|
||||||
|
|
Loading…
Add table
Reference in a new issue