From 6edf1ad773206e5e1ec76b6b8492c83eecae031d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 24 Aug 2017 00:03:43 -0700 Subject: [PATCH 001/189] UPSTREAM: loop: add ioctl for changing logical block size This is a different approach from the first attempt in f2c6df7dbf9a ("loop: support 4k physical blocksize"). Rather than extending LOOP_{GET,SET}_STATUS, add a separate ioctl just for setting the block size. Bug: 117823094 Change-Id: I8e69b8839d7fee3be564cbfce1797ce108e1aa1e Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe (cherry picked from commit 89e4fdecb51cf5535867026274bc97de9480ade5) Signed-off-by: Martijn Coenen --- drivers/block/loop.c | 24 ++++++++++++++++++++++++ include/uapi/linux/loop.h | 1 + 2 files changed, 25 insertions(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index da3902ac16c8..b98797682fe7 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1070,6 +1070,7 @@ static int loop_clr_fd(struct loop_device *lo) memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); memset(lo->lo_file_name, 0, LO_NAME_SIZE); + blk_queue_logical_block_size(lo->lo_queue, 512); if (bdev) { bdput(bdev); invalidate_bdev(bdev); @@ -1355,6 +1356,24 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) return error; } +static int loop_set_block_size(struct loop_device *lo, unsigned long arg) +{ + if (lo->lo_state != Lo_bound) + return -ENXIO; + + if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) + return -EINVAL; + + blk_mq_freeze_queue(lo->lo_queue); + + blk_queue_logical_block_size(lo->lo_queue, arg); + loop_update_dio(lo); + + blk_mq_unfreeze_queue(lo->lo_queue); + + return 0; +} + static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { @@ -1403,6 +1422,11 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) err = loop_set_dio(lo, arg); break; + case LOOP_SET_BLOCK_SIZE: + err = -EPERM; + if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) + err = loop_set_block_size(lo, arg); + break; default: err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; } diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index c8125ec1f4f2..23158dbe2424 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -88,6 +88,7 @@ struct loop_info64 { #define LOOP_CHANGE_FD 0x4C06 #define LOOP_SET_CAPACITY 0x4C07 #define LOOP_SET_DIRECT_IO 0x4C08 +#define LOOP_SET_BLOCK_SIZE 0x4C09 /* /dev/loop-control interface */ #define LOOP_CTL_ADD 0x4C80 From 8567ea359cabc9805fb9fae457c0f8df3a2b9341 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 31 Aug 2017 22:09:45 -0700 Subject: [PATCH 002/189] BACKPORT: block/loop: set hw_sectors Loop can handle any size of request. Limiting it to 255 sectors just burns the CPU for bio split and request merge for underlayer disk and also cause bad fs block allocation in directio mode. Bug: 117823094 Change-Id: Ic4957181433c5a0d15f4cfdbf69dc5558d6dc5bd Reviewed-by: Omar Sandoval Reviewed-by: Ming Lei Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe (cherry picked from commit 54bb0ade6627a183c211345761ec46e4bf0048fe) Signed-off-by: Martijn Coenen --- drivers/block/loop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b98797682fe7..3dc48abd22e8 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1813,6 +1813,7 @@ static int loop_add(struct loop_device **l, int i) } lo->lo_queue->queuedata = lo; + blk_queue_max_hw_sectors(lo->lo_queue, BLK_DEF_MAX_SECTORS); /* * It doesn't make sense to enable merge because the I/O * submitted to backing file is handled page by page. From c82807c7dd9f0125b3eaf6aefae31d63c260a60b Mon Sep 17 00:00:00 2001 From: Evan Green Date: Mon, 2 Jul 2018 16:03:46 -0700 Subject: [PATCH 003/189] UPSTREAM: loop: Add LOOP_SET_BLOCK_SIZE in compat ioctl This change adds LOOP_SET_BLOCK_SIZE as one of the supported ioctls in lo_compat_ioctl. It only takes an unsigned long argument, and in practice a 32-bit value works fine. Bug: 117823094 Change-Id: I0061a082eb2632c47b7d66f35f2c909d33ff1653 Reviewed-by: Omar Sandoval Signed-off-by: Evan Green Signed-off-by: Jens Axboe (cherry picked from commit 9fea4b395260175de4016b42982f45a3e6e03d0b) Signed-off-by: Martijn Coenen --- drivers/block/loop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3dc48abd22e8..7eac581912f0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1581,6 +1581,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, arg = (unsigned long) compat_ptr(arg); case LOOP_SET_FD: case LOOP_CHANGE_FD: + case LOOP_SET_BLOCK_SIZE: err = lo_ioctl(bdev, mode, cmd, arg); break; default: From 076c36fce1ea0664db7ecbe9ec73a19c991c9501 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 15 Oct 2018 14:53:36 -0700 Subject: [PATCH 004/189] Revert "fscrypt: add Speck128/256 support" This reverts commit eb13e0b69296ad1d3a9a3fa0cb6570aaf99f9f0c. Resolves conflicts with a later CL, e7724207f71e "fscrypt: log the crypto algorithm implementations". Also leaves the include/uapi/linux/fs.h constants in place to prevent future accidental re-use. Bug: 116008047 Change-Id: I2d64d8d3e384400b7bdfc06a353c3844d4ebb377 Signed-off-by: Alistair Strachan --- Documentation/filesystems/fscrypt.rst | 626 -------------------------- fs/crypto/fscrypt_private.h | 4 - fs/crypto/keyinfo.c | 11 +- 3 files changed, 1 insertion(+), 640 deletions(-) delete mode 100644 Documentation/filesystems/fscrypt.rst diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst deleted file mode 100644 index 48b424de85bb..000000000000 --- a/Documentation/filesystems/fscrypt.rst +++ /dev/null @@ -1,626 +0,0 @@ -===================================== -Filesystem-level encryption (fscrypt) -===================================== - -Introduction -============ - -fscrypt is a library which filesystems can hook into to support -transparent encryption of files and directories. - -Note: "fscrypt" in this document refers to the kernel-level portion, -implemented in ``fs/crypto/``, as opposed to the userspace tool -`fscrypt `_. This document only -covers the kernel-level portion. For command-line examples of how to -use encryption, see the documentation for the userspace tool `fscrypt -`_. Also, it is recommended to use -the fscrypt userspace tool, or other existing userspace tools such as -`fscryptctl `_ or `Android's key -management system -`_, over -using the kernel's API directly. Using existing tools reduces the -chance of introducing your own security bugs. (Nevertheless, for -completeness this documentation covers the kernel's API anyway.) - -Unlike dm-crypt, fscrypt operates at the filesystem level rather than -at the block device level. This allows it to encrypt different files -with different keys and to have unencrypted files on the same -filesystem. This is useful for multi-user systems where each user's -data-at-rest needs to be cryptographically isolated from the others. -However, except for filenames, fscrypt does not encrypt filesystem -metadata. - -Unlike eCryptfs, which is a stacked filesystem, fscrypt is integrated -directly into supported filesystems --- currently ext4, F2FS, and -UBIFS. This allows encrypted files to be read and written without -caching both the decrypted and encrypted pages in the pagecache, -thereby nearly halving the memory used and bringing it in line with -unencrypted files. Similarly, half as many dentries and inodes are -needed. eCryptfs also limits encrypted filenames to 143 bytes, -causing application compatibility issues; fscrypt allows the full 255 -bytes (NAME_MAX). Finally, unlike eCryptfs, the fscrypt API can be -used by unprivileged users, with no need to mount anything. - -fscrypt does not support encrypting files in-place. Instead, it -supports marking an empty directory as encrypted. Then, after -userspace provides the key, all regular files, directories, and -symbolic links created in that directory tree are transparently -encrypted. - -Threat model -============ - -Offline attacks ---------------- - -Provided that userspace chooses a strong encryption key, fscrypt -protects the confidentiality of file contents and filenames in the -event of a single point-in-time permanent offline compromise of the -block device content. fscrypt does not protect the confidentiality of -non-filename metadata, e.g. file sizes, file permissions, file -timestamps, and extended attributes. Also, the existence and location -of holes (unallocated blocks which logically contain all zeroes) in -files is not protected. - -fscrypt is not guaranteed to protect confidentiality or authenticity -if an attacker is able to manipulate the filesystem offline prior to -an authorized user later accessing the filesystem. - -Online attacks --------------- - -fscrypt (and storage encryption in general) can only provide limited -protection, if any at all, against online attacks. In detail: - -fscrypt is only resistant to side-channel attacks, such as timing or -electromagnetic attacks, to the extent that the underlying Linux -Cryptographic API algorithms are. If a vulnerable algorithm is used, -such as a table-based implementation of AES, it may be possible for an -attacker to mount a side channel attack against the online system. -Side channel attacks may also be mounted against applications -consuming decrypted data. - -After an encryption key has been provided, fscrypt is not designed to -hide the plaintext file contents or filenames from other users on the -same system, regardless of the visibility of the keyring key. -Instead, existing access control mechanisms such as file mode bits, -POSIX ACLs, LSMs, or mount namespaces should be used for this purpose. -Also note that as long as the encryption keys are *anywhere* in -memory, an online attacker can necessarily compromise them by mounting -a physical attack or by exploiting any kernel security vulnerability -which provides an arbitrary memory read primitive. - -While it is ostensibly possible to "evict" keys from the system, -recently accessed encrypted files will remain accessible at least -until the filesystem is unmounted or the VFS caches are dropped, e.g. -using ``echo 2 > /proc/sys/vm/drop_caches``. Even after that, if the -RAM is compromised before being powered off, it will likely still be -possible to recover portions of the plaintext file contents, if not -some of the encryption keys as well. (Since Linux v4.12, all -in-kernel keys related to fscrypt are sanitized before being freed. -However, userspace would need to do its part as well.) - -Currently, fscrypt does not prevent a user from maliciously providing -an incorrect key for another user's existing encrypted files. A -protection against this is planned. - -Key hierarchy -============= - -Master Keys ------------ - -Each encrypted directory tree is protected by a *master key*. Master -keys can be up to 64 bytes long, and must be at least as long as the -greater of the key length needed by the contents and filenames -encryption modes being used. For example, if AES-256-XTS is used for -contents encryption, the master key must be 64 bytes (512 bits). Note -that the XTS mode is defined to require a key twice as long as that -required by the underlying block cipher. - -To "unlock" an encrypted directory tree, userspace must provide the -appropriate master key. There can be any number of master keys, each -of which protects any number of directory trees on any number of -filesystems. - -Userspace should generate master keys either using a cryptographically -secure random number generator, or by using a KDF (Key Derivation -Function). Note that whenever a KDF is used to "stretch" a -lower-entropy secret such as a passphrase, it is critical that a KDF -designed for this purpose be used, such as scrypt, PBKDF2, or Argon2. - -Per-file keys -------------- - -Master keys are not used to encrypt file contents or names directly. -Instead, a unique key is derived for each encrypted file, including -each regular file, directory, and symbolic link. This has several -advantages: - -- In cryptosystems, the same key material should never be used for - different purposes. Using the master key as both an XTS key for - contents encryption and as a CTS-CBC key for filenames encryption - would violate this rule. -- Per-file keys simplify the choice of IVs (Initialization Vectors) - for contents encryption. Without per-file keys, to ensure IV - uniqueness both the inode and logical block number would need to be - encoded in the IVs. This would make it impossible to renumber - inodes, which e.g. ``resize2fs`` can do when resizing an ext4 - filesystem. With per-file keys, it is sufficient to encode just the - logical block number in the IVs. -- Per-file keys strengthen the encryption of filenames, where IVs are - reused out of necessity. With a unique key per directory, IV reuse - is limited to within a single directory. -- Per-file keys allow individual files to be securely erased simply by - securely erasing their keys. (Not yet implemented.) - -A KDF (Key Derivation Function) is used to derive per-file keys from -the master key. This is done instead of wrapping a randomly-generated -key for each file because it reduces the size of the encryption xattr, -which for some filesystems makes the xattr more likely to fit in-line -in the filesystem's inode table. With a KDF, only a 16-byte nonce is -required --- long enough to make key reuse extremely unlikely. A -wrapped key, on the other hand, would need to be up to 64 bytes --- -the length of an AES-256-XTS key. Furthermore, currently there is no -requirement to support unlocking a file with multiple alternative -master keys or to support rotating master keys. Instead, the master -keys may be wrapped in userspace, e.g. as done by the `fscrypt -`_ tool. - -The current KDF encrypts the master key using the 16-byte nonce as an -AES-128-ECB key. The output is used as the derived key. If the -output is longer than needed, then it is truncated to the needed -length. Truncation is the norm for directories and symlinks, since -those use the CTS-CBC encryption mode which requires a key half as -long as that required by the XTS encryption mode. - -Note: this KDF meets the primary security requirement, which is to -produce unique derived keys that preserve the entropy of the master -key, assuming that the master key is already a good pseudorandom key. -However, it is nonstandard and has some problems such as being -reversible, so it is generally considered to be a mistake! It may be -replaced with HKDF or another more standard KDF in the future. - -Encryption modes and usage -========================== - -fscrypt allows one encryption mode to be specified for file contents -and one encryption mode to be specified for filenames. Different -directory trees are permitted to use different encryption modes. -Currently, the following pairs of encryption modes are supported: - -- AES-256-XTS for contents and AES-256-CTS-CBC for filenames -- AES-128-CBC for contents and AES-128-CTS-CBC for filenames -- Speck128/256-XTS for contents and Speck128/256-CTS-CBC for filenames - -It is strongly recommended to use AES-256-XTS for contents encryption. -AES-128-CBC was added only for low-powered embedded devices with -crypto accelerators such as CAAM or CESA that do not support XTS. - -Similarly, Speck128/256 support was only added for older or low-end -CPUs which cannot do AES fast enough -- especially ARM CPUs which have -NEON instructions but not the Cryptography Extensions -- and for which -it would not otherwise be feasible to use encryption at all. It is -not recommended to use Speck on CPUs that have AES instructions. -Speck support is only available if it has been enabled in the crypto -API via CONFIG_CRYPTO_SPECK. Also, on ARM platforms, to get -acceptable performance CONFIG_CRYPTO_SPECK_NEON must be enabled. - -New encryption modes can be added relatively easily, without changes -to individual filesystems. However, authenticated encryption (AE) -modes are not currently supported because of the difficulty of dealing -with ciphertext expansion. - -For file contents, each filesystem block is encrypted independently. -Currently, only the case where the filesystem block size is equal to -the system's page size (usually 4096 bytes) is supported. With the -XTS mode of operation (recommended), the logical block number within -the file is used as the IV. With the CBC mode of operation (not -recommended), ESSIV is used; specifically, the IV for CBC is the -logical block number encrypted with AES-256, where the AES-256 key is -the SHA-256 hash of the inode's data encryption key. - -For filenames, the full filename is encrypted at once. Because of the -requirements to retain support for efficient directory lookups and -filenames of up to 255 bytes, a constant initialization vector (IV) is -used. However, each encrypted directory uses a unique key, which -limits IV reuse to within a single directory. Note that IV reuse in -the context of CTS-CBC encryption means that when the original -filenames share a common prefix at least as long as the cipher block -size (16 bytes for AES), the corresponding encrypted filenames will -also share a common prefix. This is undesirable; it may be fixed in -the future by switching to an encryption mode that is a strong -pseudorandom permutation on arbitrary-length messages, e.g. the HEH -(Hash-Encrypt-Hash) mode. - -Since filenames are encrypted with the CTS-CBC mode of operation, the -plaintext and ciphertext filenames need not be multiples of the AES -block size, i.e. 16 bytes. However, the minimum size that can be -encrypted is 16 bytes, so shorter filenames are NUL-padded to 16 bytes -before being encrypted. In addition, to reduce leakage of filename -lengths via their ciphertexts, all filenames are NUL-padded to the -next 4, 8, 16, or 32-byte boundary (configurable). 32 is recommended -since this provides the best confidentiality, at the cost of making -directory entries consume slightly more space. Note that since NUL -(``\0``) is not otherwise a valid character in filenames, the padding -will never produce duplicate plaintexts. - -Symbolic link targets are considered a type of filename and are -encrypted in the same way as filenames in directory entries. Each -symlink also uses a unique key; hence, the hardcoded IV is not a -problem for symlinks. - -User API -======== - -Setting an encryption policy ----------------------------- - -The FS_IOC_SET_ENCRYPTION_POLICY ioctl sets an encryption policy on an -empty directory or verifies that a directory or regular file already -has the specified encryption policy. It takes in a pointer to a -:c:type:`struct fscrypt_policy`, defined as follows:: - - #define FS_KEY_DESCRIPTOR_SIZE 8 - - struct fscrypt_policy { - __u8 version; - __u8 contents_encryption_mode; - __u8 filenames_encryption_mode; - __u8 flags; - __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; - }; - -This structure must be initialized as follows: - -- ``version`` must be 0. - -- ``contents_encryption_mode`` and ``filenames_encryption_mode`` must - be set to constants from ```` which identify the - encryption modes to use. If unsure, use - FS_ENCRYPTION_MODE_AES_256_XTS (1) for ``contents_encryption_mode`` - and FS_ENCRYPTION_MODE_AES_256_CTS (4) for - ``filenames_encryption_mode``. - -- ``flags`` must be set to a value from ```` which - identifies the amount of NUL-padding to use when encrypting - filenames. If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3). - -- ``master_key_descriptor`` specifies how to find the master key in - the keyring; see `Adding keys`_. It is up to userspace to choose a - unique ``master_key_descriptor`` for each master key. The e4crypt - and fscrypt tools use the first 8 bytes of - ``SHA-512(SHA-512(master_key))``, but this particular scheme is not - required. Also, the master key need not be in the keyring yet when - FS_IOC_SET_ENCRYPTION_POLICY is executed. However, it must be added - before any files can be created in the encrypted directory. - -If the file is not yet encrypted, then FS_IOC_SET_ENCRYPTION_POLICY -verifies that the file is an empty directory. If so, the specified -encryption policy is assigned to the directory, turning it into an -encrypted directory. After that, and after providing the -corresponding master key as described in `Adding keys`_, all regular -files, directories (recursively), and symlinks created in the -directory will be encrypted, inheriting the same encryption policy. -The filenames in the directory's entries will be encrypted as well. - -Alternatively, if the file is already encrypted, then -FS_IOC_SET_ENCRYPTION_POLICY validates that the specified encryption -policy exactly matches the actual one. If they match, then the ioctl -returns 0. Otherwise, it fails with EEXIST. This works on both -regular files and directories, including nonempty directories. - -Note that the ext4 filesystem does not allow the root directory to be -encrypted, even if it is empty. Users who want to encrypt an entire -filesystem with one key should consider using dm-crypt instead. - -FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors: - -- ``EACCES``: the file is not owned by the process's uid, nor does the - process have the CAP_FOWNER capability in a namespace with the file - owner's uid mapped -- ``EEXIST``: the file is already encrypted with an encryption policy - different from the one specified -- ``EINVAL``: an invalid encryption policy was specified (invalid - version, mode(s), or flags) -- ``ENOTDIR``: the file is unencrypted and is a regular file, not a - directory -- ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory -- ``ENOTTY``: this type of filesystem does not implement encryption -- ``EOPNOTSUPP``: the kernel was not configured with encryption - support for this filesystem, or the filesystem superblock has not - had encryption enabled on it. (For example, to use encryption on an - ext4 filesystem, CONFIG_EXT4_ENCRYPTION must be enabled in the - kernel config, and the superblock must have had the "encrypt" - feature flag enabled using ``tune2fs -O encrypt`` or ``mkfs.ext4 -O - encrypt``.) -- ``EPERM``: this directory may not be encrypted, e.g. because it is - the root directory of an ext4 filesystem -- ``EROFS``: the filesystem is readonly - -Getting an encryption policy ----------------------------- - -The FS_IOC_GET_ENCRYPTION_POLICY ioctl retrieves the :c:type:`struct -fscrypt_policy`, if any, for a directory or regular file. See above -for the struct definition. No additional permissions are required -beyond the ability to open the file. - -FS_IOC_GET_ENCRYPTION_POLICY can fail with the following errors: - -- ``EINVAL``: the file is encrypted, but it uses an unrecognized - encryption context format -- ``ENODATA``: the file is not encrypted -- ``ENOTTY``: this type of filesystem does not implement encryption -- ``EOPNOTSUPP``: the kernel was not configured with encryption - support for this filesystem - -Note: if you only need to know whether a file is encrypted or not, on -most filesystems it is also possible to use the FS_IOC_GETFLAGS ioctl -and check for FS_ENCRYPT_FL, or to use the statx() system call and -check for STATX_ATTR_ENCRYPTED in stx_attributes. - -Getting the per-filesystem salt -------------------------------- - -Some filesystems, such as ext4 and F2FS, also support the deprecated -ioctl FS_IOC_GET_ENCRYPTION_PWSALT. This ioctl retrieves a randomly -generated 16-byte value stored in the filesystem superblock. This -value is intended to used as a salt when deriving an encryption key -from a passphrase or other low-entropy user credential. - -FS_IOC_GET_ENCRYPTION_PWSALT is deprecated. Instead, prefer to -generate and manage any needed salt(s) in userspace. - -Adding keys ------------ - -To provide a master key, userspace must add it to an appropriate -keyring using the add_key() system call (see: -``Documentation/security/keys/core.rst``). The key type must be -"logon"; keys of this type are kept in kernel memory and cannot be -read back by userspace. The key description must be "fscrypt:" -followed by the 16-character lower case hex representation of the -``master_key_descriptor`` that was set in the encryption policy. The -key payload must conform to the following structure:: - - #define FS_MAX_KEY_SIZE 64 - - struct fscrypt_key { - u32 mode; - u8 raw[FS_MAX_KEY_SIZE]; - u32 size; - }; - -``mode`` is ignored; just set it to 0. The actual key is provided in -``raw`` with ``size`` indicating its size in bytes. That is, the -bytes ``raw[0..size-1]`` (inclusive) are the actual key. - -The key description prefix "fscrypt:" may alternatively be replaced -with a filesystem-specific prefix such as "ext4:". However, the -filesystem-specific prefixes are deprecated and should not be used in -new programs. - -There are several different types of keyrings in which encryption keys -may be placed, such as a session keyring, a user session keyring, or a -user keyring. Each key must be placed in a keyring that is "attached" -to all processes that might need to access files encrypted with it, in -the sense that request_key() will find the key. Generally, if only -processes belonging to a specific user need to access a given -encrypted directory and no session keyring has been installed, then -that directory's key should be placed in that user's user session -keyring or user keyring. Otherwise, a session keyring should be -installed if needed, and the key should be linked into that session -keyring, or in a keyring linked into that session keyring. - -Note: introducing the complex visibility semantics of keyrings here -was arguably a mistake --- especially given that by design, after any -process successfully opens an encrypted file (thereby setting up the -per-file key), possessing the keyring key is not actually required for -any process to read/write the file until its in-memory inode is -evicted. In the future there probably should be a way to provide keys -directly to the filesystem instead, which would make the intended -semantics clearer. - -Access semantics -================ - -With the key ------------- - -With the encryption key, encrypted regular files, directories, and -symlinks behave very similarly to their unencrypted counterparts --- -after all, the encryption is intended to be transparent. However, -astute users may notice some differences in behavior: - -- Unencrypted files, or files encrypted with a different encryption - policy (i.e. different key, modes, or flags), cannot be renamed or - linked into an encrypted directory; see `Encryption policy - enforcement`_. Attempts to do so will fail with EPERM. However, - encrypted files can be renamed within an encrypted directory, or - into an unencrypted directory. - -- Direct I/O is not supported on encrypted files. Attempts to use - direct I/O on such files will fall back to buffered I/O. - -- The fallocate operations FALLOC_FL_COLLAPSE_RANGE, - FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported - on encrypted files and will fail with EOPNOTSUPP. - -- Online defragmentation of encrypted files is not supported. The - EXT4_IOC_MOVE_EXT and F2FS_IOC_MOVE_RANGE ioctls will fail with - EOPNOTSUPP. - -- The ext4 filesystem does not support data journaling with encrypted - regular files. It will fall back to ordered data mode instead. - -- DAX (Direct Access) is not supported on encrypted files. - -- The st_size of an encrypted symlink will not necessarily give the - length of the symlink target as required by POSIX. It will actually - give the length of the ciphertext, which will be slightly longer - than the plaintext due to NUL-padding and an extra 2-byte overhead. - -- The maximum length of an encrypted symlink is 2 bytes shorter than - the maximum length of an unencrypted symlink. For example, on an - EXT4 filesystem with a 4K block size, unencrypted symlinks can be up - to 4095 bytes long, while encrypted symlinks can only be up to 4093 - bytes long (both lengths excluding the terminating null). - -Note that mmap *is* supported. This is possible because the pagecache -for an encrypted file contains the plaintext, not the ciphertext. - -Without the key ---------------- - -Some filesystem operations may be performed on encrypted regular -files, directories, and symlinks even before their encryption key has -been provided: - -- File metadata may be read, e.g. using stat(). - -- Directories may be listed, in which case the filenames will be - listed in an encoded form derived from their ciphertext. The - current encoding algorithm is described in `Filename hashing and - encoding`_. The algorithm is subject to change, but it is - guaranteed that the presented filenames will be no longer than - NAME_MAX bytes, will not contain the ``/`` or ``\0`` characters, and - will uniquely identify directory entries. - - The ``.`` and ``..`` directory entries are special. They are always - present and are not encrypted or encoded. - -- Files may be deleted. That is, nondirectory files may be deleted - with unlink() as usual, and empty directories may be deleted with - rmdir() as usual. Therefore, ``rm`` and ``rm -r`` will work as - expected. - -- Symlink targets may be read and followed, but they will be presented - in encrypted form, similar to filenames in directories. Hence, they - are unlikely to point to anywhere useful. - -Without the key, regular files cannot be opened or truncated. -Attempts to do so will fail with ENOKEY. This implies that any -regular file operations that require a file descriptor, such as -read(), write(), mmap(), fallocate(), and ioctl(), are also forbidden. - -Also without the key, files of any type (including directories) cannot -be created or linked into an encrypted directory, nor can a name in an -encrypted directory be the source or target of a rename, nor can an -O_TMPFILE temporary file be created in an encrypted directory. All -such operations will fail with ENOKEY. - -It is not currently possible to backup and restore encrypted files -without the encryption key. This would require special APIs which -have not yet been implemented. - -Encryption policy enforcement -============================= - -After an encryption policy has been set on a directory, all regular -files, directories, and symbolic links created in that directory -(recursively) will inherit that encryption policy. Special files --- -that is, named pipes, device nodes, and UNIX domain sockets --- will -not be encrypted. - -Except for those special files, it is forbidden to have unencrypted -files, or files encrypted with a different encryption policy, in an -encrypted directory tree. Attempts to link or rename such a file into -an encrypted directory will fail with EPERM. This is also enforced -during ->lookup() to provide limited protection against offline -attacks that try to disable or downgrade encryption in known locations -where applications may later write sensitive data. It is recommended -that systems implementing a form of "verified boot" take advantage of -this by validating all top-level encryption policies prior to access. - -Implementation details -====================== - -Encryption context ------------------- - -An encryption policy is represented on-disk by a :c:type:`struct -fscrypt_context`. It is up to individual filesystems to decide where -to store it, but normally it would be stored in a hidden extended -attribute. It should *not* be exposed by the xattr-related system -calls such as getxattr() and setxattr() because of the special -semantics of the encryption xattr. (In particular, there would be -much confusion if an encryption policy were to be added to or removed -from anything other than an empty directory.) The struct is defined -as follows:: - - #define FS_KEY_DESCRIPTOR_SIZE 8 - #define FS_KEY_DERIVATION_NONCE_SIZE 16 - - struct fscrypt_context { - u8 format; - u8 contents_encryption_mode; - u8 filenames_encryption_mode; - u8 flags; - u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE]; - u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; - }; - -Note that :c:type:`struct fscrypt_context` contains the same -information as :c:type:`struct fscrypt_policy` (see `Setting an -encryption policy`_), except that :c:type:`struct fscrypt_context` -also contains a nonce. The nonce is randomly generated by the kernel -and is used to derive the inode's encryption key as described in -`Per-file keys`_. - -Data path changes ------------------ - -For the read path (->readpage()) of regular files, filesystems can -read the ciphertext into the page cache and decrypt it in-place. The -page lock must be held until decryption has finished, to prevent the -page from becoming visible to userspace prematurely. - -For the write path (->writepage()) of regular files, filesystems -cannot encrypt data in-place in the page cache, since the cached -plaintext must be preserved. Instead, filesystems must encrypt into a -temporary buffer or "bounce page", then write out the temporary -buffer. Some filesystems, such as UBIFS, already use temporary -buffers regardless of encryption. Other filesystems, such as ext4 and -F2FS, have to allocate bounce pages specially for encryption. - -Filename hashing and encoding ------------------------------ - -Modern filesystems accelerate directory lookups by using indexed -directories. An indexed directory is organized as a tree keyed by -filename hashes. When a ->lookup() is requested, the filesystem -normally hashes the filename being looked up so that it can quickly -find the corresponding directory entry, if any. - -With encryption, lookups must be supported and efficient both with and -without the encryption key. Clearly, it would not work to hash the -plaintext filenames, since the plaintext filenames are unavailable -without the key. (Hashing the plaintext filenames would also make it -impossible for the filesystem's fsck tool to optimize encrypted -directories.) Instead, filesystems hash the ciphertext filenames, -i.e. the bytes actually stored on-disk in the directory entries. When -asked to do a ->lookup() with the key, the filesystem just encrypts -the user-supplied name to get the ciphertext. - -Lookups without the key are more complicated. The raw ciphertext may -contain the ``\0`` and ``/`` characters, which are illegal in -filenames. Therefore, readdir() must base64-encode the ciphertext for -presentation. For most filenames, this works fine; on ->lookup(), the -filesystem just base64-decodes the user-supplied name to get back to -the raw ciphertext. - -However, for very long filenames, base64 encoding would cause the -filename length to exceed NAME_MAX. To prevent this, readdir() -actually presents long filenames in an abbreviated form which encodes -a strong "hash" of the ciphertext filename, along with the optional -filesystem-specific hash(es) needed for directory lookups. This -allows the filesystem to still, with a high degree of confidence, map -the filename given in ->lookup() back to a particular directory entry -that was previously listed by readdir(). See :c:type:`struct -fscrypt_digested_name` in the source for more details. - -Note that the precise way that filenames are presented to userspace -without the key is subject to change in the future. It is only meant -as a way to temporarily present valid filenames so that commands like -``rm -r`` work as expected on encrypted directories. diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index ea372cd53ab6..92c6c0ace1b1 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -93,10 +93,6 @@ static inline bool fscrypt_valid_enc_modes(u32 contents_mode, filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) return true; - if (contents_mode == FS_ENCRYPTION_MODE_SPECK128_256_XTS && - filenames_mode == FS_ENCRYPTION_MODE_SPECK128_256_CTS) - return true; - return false; } diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 382e828f2f9a..b13e968273a4 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -153,6 +153,7 @@ static struct fscrypt_mode { int keysize; bool logged_impl_name; } available_modes[] = { +<<<<<<< HEAD [FS_ENCRYPTION_MODE_AES_256_XTS] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", @@ -173,16 +174,6 @@ static struct fscrypt_mode { .cipher_str = "cts(cbc(aes))", .keysize = 16, }, - [FS_ENCRYPTION_MODE_SPECK128_256_XTS] = { - .friendly_name = "Speck128/256-XTS", - .cipher_str = "xts(speck128)", - .keysize = 64, - }, - [FS_ENCRYPTION_MODE_SPECK128_256_CTS] = { - .friendly_name = "Speck128/256-CTS-CBC", - .cipher_str = "cts(cbc(speck128))", - .keysize = 32, - }, }; static struct fscrypt_mode * From 08400aec8d4a5f438e71502b4f7745dd5de1caa8 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 22 Oct 2018 14:13:16 -0700 Subject: [PATCH 005/189] Revert "BACKPORT, FROMLIST: crypto: arm64/speck - add NEON-accelerated implementation of Speck-XTS" This reverts commit 37343fde0c4a8645e9530540bb8c10d18db3875a. Bug: 116008047 Change-Id: Ic47509910c162a35f6fba10a196f4369299451ac Signed-off-by: Alistair Strachan --- arch/arm64/crypto/Kconfig | 7 - arch/arm64/crypto/Makefile | 3 - arch/arm64/crypto/speck-neon-core.S | 352 ---------------------------- arch/arm64/crypto/speck-neon-glue.c | 308 ------------------------ 4 files changed, 670 deletions(-) delete mode 100644 arch/arm64/crypto/speck-neon-core.S delete mode 100644 arch/arm64/crypto/speck-neon-glue.c diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 487cd382d333..de1aab4b5da8 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -58,11 +58,4 @@ config CRYPTO_CRC32_ARM64 tristate "CRC32 and CRC32C using optional ARMv8 instructions" depends on ARM64 select CRYPTO_HASH - -config CRYPTO_SPECK_NEON - tristate "NEON accelerated Speck cipher algorithms" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_GF128MUL - select CRYPTO_SPECK endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 28a71246e0a3..f0a8f2475ea3 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -32,9 +32,6 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o aes-neon-blk-y := aes-glue-neon.o aes-neon.o -obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o -speck-neon-y := speck-neon-core.o speck-neon-glue.o - AFLAGS_aes-ce.o := -DINTERLEAVE=4 AFLAGS_aes-neon.o := -DINTERLEAVE=4 diff --git a/arch/arm64/crypto/speck-neon-core.S b/arch/arm64/crypto/speck-neon-core.S deleted file mode 100644 index b14463438b09..000000000000 --- a/arch/arm64/crypto/speck-neon-core.S +++ /dev/null @@ -1,352 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS - * - * Copyright (c) 2018 Google, Inc - * - * Author: Eric Biggers - */ - -#include - - .text - - // arguments - ROUND_KEYS .req x0 // const {u64,u32} *round_keys - NROUNDS .req w1 // int nrounds - NROUNDS_X .req x1 - DST .req x2 // void *dst - SRC .req x3 // const void *src - NBYTES .req w4 // unsigned int nbytes - TWEAK .req x5 // void *tweak - - // registers which hold the data being encrypted/decrypted - // (underscores avoid a naming collision with ARM64 registers x0-x3) - X_0 .req v0 - Y_0 .req v1 - X_1 .req v2 - Y_1 .req v3 - X_2 .req v4 - Y_2 .req v5 - X_3 .req v6 - Y_3 .req v7 - - // the round key, duplicated in all lanes - ROUND_KEY .req v8 - - // index vector for tbl-based 8-bit rotates - ROTATE_TABLE .req v9 - ROTATE_TABLE_Q .req q9 - - // temporary registers - TMP0 .req v10 - TMP1 .req v11 - TMP2 .req v12 - TMP3 .req v13 - - // multiplication table for updating XTS tweaks - GFMUL_TABLE .req v14 - GFMUL_TABLE_Q .req q14 - - // next XTS tweak value(s) - TWEAKV_NEXT .req v15 - - // XTS tweaks for the blocks currently being encrypted/decrypted - TWEAKV0 .req v16 - TWEAKV1 .req v17 - TWEAKV2 .req v18 - TWEAKV3 .req v19 - TWEAKV4 .req v20 - TWEAKV5 .req v21 - TWEAKV6 .req v22 - TWEAKV7 .req v23 - - .align 4 -.Lror64_8_table: - .octa 0x080f0e0d0c0b0a090007060504030201 -.Lror32_8_table: - .octa 0x0c0f0e0d080b0a090407060500030201 -.Lrol64_8_table: - .octa 0x0e0d0c0b0a09080f0605040302010007 -.Lrol32_8_table: - .octa 0x0e0d0c0f0a09080b0605040702010003 -.Lgf128mul_table: - .octa 0x00000000000000870000000000000001 -.Lgf64mul_table: - .octa 0x0000000000000000000000002d361b00 - -/* - * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time - * - * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for - * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes - * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64. - * 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64. - */ -.macro _speck_round_128bytes n, lanes - - // x = ror(x, 8) - tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b - tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b - tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b - tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b - - // x += y - add X_0.\lanes, X_0.\lanes, Y_0.\lanes - add X_1.\lanes, X_1.\lanes, Y_1.\lanes - add X_2.\lanes, X_2.\lanes, Y_2.\lanes - add X_3.\lanes, X_3.\lanes, Y_3.\lanes - - // x ^= k - eor X_0.16b, X_0.16b, ROUND_KEY.16b - eor X_1.16b, X_1.16b, ROUND_KEY.16b - eor X_2.16b, X_2.16b, ROUND_KEY.16b - eor X_3.16b, X_3.16b, ROUND_KEY.16b - - // y = rol(y, 3) - shl TMP0.\lanes, Y_0.\lanes, #3 - shl TMP1.\lanes, Y_1.\lanes, #3 - shl TMP2.\lanes, Y_2.\lanes, #3 - shl TMP3.\lanes, Y_3.\lanes, #3 - sri TMP0.\lanes, Y_0.\lanes, #(\n - 3) - sri TMP1.\lanes, Y_1.\lanes, #(\n - 3) - sri TMP2.\lanes, Y_2.\lanes, #(\n - 3) - sri TMP3.\lanes, Y_3.\lanes, #(\n - 3) - - // y ^= x - eor Y_0.16b, TMP0.16b, X_0.16b - eor Y_1.16b, TMP1.16b, X_1.16b - eor Y_2.16b, TMP2.16b, X_2.16b - eor Y_3.16b, TMP3.16b, X_3.16b -.endm - -/* - * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time - * - * This is the inverse of _speck_round_128bytes(). - */ -.macro _speck_unround_128bytes n, lanes - - // y ^= x - eor TMP0.16b, Y_0.16b, X_0.16b - eor TMP1.16b, Y_1.16b, X_1.16b - eor TMP2.16b, Y_2.16b, X_2.16b - eor TMP3.16b, Y_3.16b, X_3.16b - - // y = ror(y, 3) - ushr Y_0.\lanes, TMP0.\lanes, #3 - ushr Y_1.\lanes, TMP1.\lanes, #3 - ushr Y_2.\lanes, TMP2.\lanes, #3 - ushr Y_3.\lanes, TMP3.\lanes, #3 - sli Y_0.\lanes, TMP0.\lanes, #(\n - 3) - sli Y_1.\lanes, TMP1.\lanes, #(\n - 3) - sli Y_2.\lanes, TMP2.\lanes, #(\n - 3) - sli Y_3.\lanes, TMP3.\lanes, #(\n - 3) - - // x ^= k - eor X_0.16b, X_0.16b, ROUND_KEY.16b - eor X_1.16b, X_1.16b, ROUND_KEY.16b - eor X_2.16b, X_2.16b, ROUND_KEY.16b - eor X_3.16b, X_3.16b, ROUND_KEY.16b - - // x -= y - sub X_0.\lanes, X_0.\lanes, Y_0.\lanes - sub X_1.\lanes, X_1.\lanes, Y_1.\lanes - sub X_2.\lanes, X_2.\lanes, Y_2.\lanes - sub X_3.\lanes, X_3.\lanes, Y_3.\lanes - - // x = rol(x, 8) - tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b - tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b - tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b - tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b -.endm - -.macro _next_xts_tweak next, cur, tmp, n -.if \n == 64 - /* - * Calculate the next tweak by multiplying the current one by x, - * modulo p(x) = x^128 + x^7 + x^2 + x + 1. - */ - sshr \tmp\().2d, \cur\().2d, #63 - and \tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b - shl \next\().2d, \cur\().2d, #1 - ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 - eor \next\().16b, \next\().16b, \tmp\().16b -.else - /* - * Calculate the next two tweaks by multiplying the current ones by x^2, - * modulo p(x) = x^64 + x^4 + x^3 + x + 1. - */ - ushr \tmp\().2d, \cur\().2d, #62 - shl \next\().2d, \cur\().2d, #2 - tbl \tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b - eor \next\().16b, \next\().16b, \tmp\().16b -.endif -.endm - -/* - * _speck_xts_crypt() - Speck-XTS encryption/decryption - * - * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer - * using Speck-XTS, specifically the variant with a block size of '2n' and round - * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and - * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a - * nonzero multiple of 128. - */ -.macro _speck_xts_crypt n, lanes, decrypting - - /* - * If decrypting, modify the ROUND_KEYS parameter to point to the last - * round key rather than the first, since for decryption the round keys - * are used in reverse order. - */ -.if \decrypting - mov NROUNDS, NROUNDS /* zero the high 32 bits */ -.if \n == 64 - add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3 - sub ROUND_KEYS, ROUND_KEYS, #8 -.else - add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2 - sub ROUND_KEYS, ROUND_KEYS, #4 -.endif -.endif - - // Load the index vector for tbl-based 8-bit rotates -.if \decrypting - ldr ROTATE_TABLE_Q, .Lrol\n\()_8_table -.else - ldr ROTATE_TABLE_Q, .Lror\n\()_8_table -.endif - - // One-time XTS preparation -.if \n == 64 - // Load first tweak - ld1 {TWEAKV0.16b}, [TWEAK] - - // Load GF(2^128) multiplication table - ldr GFMUL_TABLE_Q, .Lgf128mul_table -.else - // Load first tweak - ld1 {TWEAKV0.8b}, [TWEAK] - - // Load GF(2^64) multiplication table - ldr GFMUL_TABLE_Q, .Lgf64mul_table - - // Calculate second tweak, packing it together with the first - ushr TMP0.2d, TWEAKV0.2d, #63 - shl TMP1.2d, TWEAKV0.2d, #1 - tbl TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b - eor TMP0.8b, TMP0.8b, TMP1.8b - mov TWEAKV0.d[1], TMP0.d[0] -.endif - -.Lnext_128bytes_\@: - - // Calculate XTS tweaks for next 128 bytes - _next_xts_tweak TWEAKV1, TWEAKV0, TMP0, \n - _next_xts_tweak TWEAKV2, TWEAKV1, TMP0, \n - _next_xts_tweak TWEAKV3, TWEAKV2, TMP0, \n - _next_xts_tweak TWEAKV4, TWEAKV3, TMP0, \n - _next_xts_tweak TWEAKV5, TWEAKV4, TMP0, \n - _next_xts_tweak TWEAKV6, TWEAKV5, TMP0, \n - _next_xts_tweak TWEAKV7, TWEAKV6, TMP0, \n - _next_xts_tweak TWEAKV_NEXT, TWEAKV7, TMP0, \n - - // Load the next source blocks into {X,Y}[0-3] - ld1 {X_0.16b-Y_1.16b}, [SRC], #64 - ld1 {X_2.16b-Y_3.16b}, [SRC], #64 - - // XOR the source blocks with their XTS tweaks - eor TMP0.16b, X_0.16b, TWEAKV0.16b - eor Y_0.16b, Y_0.16b, TWEAKV1.16b - eor TMP1.16b, X_1.16b, TWEAKV2.16b - eor Y_1.16b, Y_1.16b, TWEAKV3.16b - eor TMP2.16b, X_2.16b, TWEAKV4.16b - eor Y_2.16b, Y_2.16b, TWEAKV5.16b - eor TMP3.16b, X_3.16b, TWEAKV6.16b - eor Y_3.16b, Y_3.16b, TWEAKV7.16b - - /* - * De-interleave the 'x' and 'y' elements of each block, i.e. make it so - * that the X[0-3] registers contain only the second halves of blocks, - * and the Y[0-3] registers contain only the first halves of blocks. - * (Speck uses the order (y, x) rather than the more intuitive (x, y).) - */ - uzp2 X_0.\lanes, TMP0.\lanes, Y_0.\lanes - uzp1 Y_0.\lanes, TMP0.\lanes, Y_0.\lanes - uzp2 X_1.\lanes, TMP1.\lanes, Y_1.\lanes - uzp1 Y_1.\lanes, TMP1.\lanes, Y_1.\lanes - uzp2 X_2.\lanes, TMP2.\lanes, Y_2.\lanes - uzp1 Y_2.\lanes, TMP2.\lanes, Y_2.\lanes - uzp2 X_3.\lanes, TMP3.\lanes, Y_3.\lanes - uzp1 Y_3.\lanes, TMP3.\lanes, Y_3.\lanes - - // Do the cipher rounds - mov x6, ROUND_KEYS - mov w7, NROUNDS -.Lnext_round_\@: -.if \decrypting - ld1r {ROUND_KEY.\lanes}, [x6] - sub x6, x6, #( \n / 8 ) - _speck_unround_128bytes \n, \lanes -.else - ld1r {ROUND_KEY.\lanes}, [x6], #( \n / 8 ) - _speck_round_128bytes \n, \lanes -.endif - subs w7, w7, #1 - bne .Lnext_round_\@ - - // Re-interleave the 'x' and 'y' elements of each block - zip1 TMP0.\lanes, Y_0.\lanes, X_0.\lanes - zip2 Y_0.\lanes, Y_0.\lanes, X_0.\lanes - zip1 TMP1.\lanes, Y_1.\lanes, X_1.\lanes - zip2 Y_1.\lanes, Y_1.\lanes, X_1.\lanes - zip1 TMP2.\lanes, Y_2.\lanes, X_2.\lanes - zip2 Y_2.\lanes, Y_2.\lanes, X_2.\lanes - zip1 TMP3.\lanes, Y_3.\lanes, X_3.\lanes - zip2 Y_3.\lanes, Y_3.\lanes, X_3.\lanes - - // XOR the encrypted/decrypted blocks with the tweaks calculated earlier - eor X_0.16b, TMP0.16b, TWEAKV0.16b - eor Y_0.16b, Y_0.16b, TWEAKV1.16b - eor X_1.16b, TMP1.16b, TWEAKV2.16b - eor Y_1.16b, Y_1.16b, TWEAKV3.16b - eor X_2.16b, TMP2.16b, TWEAKV4.16b - eor Y_2.16b, Y_2.16b, TWEAKV5.16b - eor X_3.16b, TMP3.16b, TWEAKV6.16b - eor Y_3.16b, Y_3.16b, TWEAKV7.16b - mov TWEAKV0.16b, TWEAKV_NEXT.16b - - // Store the ciphertext in the destination buffer - st1 {X_0.16b-Y_1.16b}, [DST], #64 - st1 {X_2.16b-Y_3.16b}, [DST], #64 - - // Continue if there are more 128-byte chunks remaining - subs NBYTES, NBYTES, #128 - bne .Lnext_128bytes_\@ - - // Store the next tweak and return -.if \n == 64 - st1 {TWEAKV_NEXT.16b}, [TWEAK] -.else - st1 {TWEAKV_NEXT.8b}, [TWEAK] -.endif - ret -.endm - -ENTRY(speck128_xts_encrypt_neon) - _speck_xts_crypt n=64, lanes=2d, decrypting=0 -ENDPROC(speck128_xts_encrypt_neon) - -ENTRY(speck128_xts_decrypt_neon) - _speck_xts_crypt n=64, lanes=2d, decrypting=1 -ENDPROC(speck128_xts_decrypt_neon) - -ENTRY(speck64_xts_encrypt_neon) - _speck_xts_crypt n=32, lanes=4s, decrypting=0 -ENDPROC(speck64_xts_encrypt_neon) - -ENTRY(speck64_xts_decrypt_neon) - _speck_xts_crypt n=32, lanes=4s, decrypting=1 -ENDPROC(speck64_xts_decrypt_neon) diff --git a/arch/arm64/crypto/speck-neon-glue.c b/arch/arm64/crypto/speck-neon-glue.c deleted file mode 100644 index c7d18569d408..000000000000 --- a/arch/arm64/crypto/speck-neon-glue.c +++ /dev/null @@ -1,308 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS - * (64-bit version; based on the 32-bit version) - * - * Copyright (c) 2018 Google, Inc - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* The assembly functions only handle multiples of 128 bytes */ -#define SPECK_NEON_CHUNK_SIZE 128 - -/* Speck128 */ - -struct speck128_xts_tfm_ctx { - struct speck128_tfm_ctx main_key; - struct speck128_tfm_ctx tweak_key; -}; - -asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *, - u8 *, const u8 *); -typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *, - const void *, unsigned int, void *); - -static __always_inline int -__speck128_xts_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - speck128_crypt_one_t crypt_one, - speck128_xts_crypt_many_t crypt_many) -{ - struct crypto_blkcipher *tfm = desc->tfm; - const struct speck128_xts_tfm_ctx *ctx = crypto_blkcipher_ctx(tfm); - struct blkcipher_walk walk; - le128 tweak; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, SPECK_NEON_CHUNK_SIZE); - - crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - const u8 *src = walk.src.virt.addr; - - if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { - unsigned int count; - - count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); - kernel_neon_begin(); - (*crypt_many)(ctx->main_key.round_keys, - ctx->main_key.nrounds, - dst, src, count, &tweak); - kernel_neon_end(); - dst += count; - src += count; - nbytes -= count; - } - - /* Handle any remainder with generic code */ - while (nbytes >= sizeof(tweak)) { - le128_xor((le128 *)dst, (const le128 *)src, &tweak); - (*crypt_one)(&ctx->main_key, dst, dst); - le128_xor((le128 *)dst, (const le128 *)dst, &tweak); - gf128mul_x_ble((be128 *)&tweak, (const be128 *)&tweak); - - dst += sizeof(tweak); - src += sizeof(tweak); - nbytes -= sizeof(tweak); - } - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static int speck128_xts_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes) -{ - return __speck128_xts_crypt(desc, dst, src, nbytes, - crypto_speck128_encrypt, - speck128_xts_encrypt_neon); -} - -static int speck128_xts_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes) -{ - return __speck128_xts_crypt(desc, dst, src, nbytes, - crypto_speck128_decrypt, - speck128_xts_decrypt_neon); -} - -static int speck128_xts_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck128_xts_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - if (keylen % 2) - return -EINVAL; - - keylen /= 2; - - err = crypto_speck128_setkey(&ctx->main_key, key, keylen); - if (err) - return err; - - return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen); -} - -/* Speck64 */ - -struct speck64_xts_tfm_ctx { - struct speck64_tfm_ctx main_key; - struct speck64_tfm_ctx tweak_key; -}; - -asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *, - u8 *, const u8 *); -typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *, - const void *, unsigned int, void *); - -static __always_inline int -__speck64_xts_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - speck64_crypt_one_t crypt_one, - speck64_xts_crypt_many_t crypt_many) -{ - struct crypto_blkcipher *tfm = desc->tfm; - const struct speck64_xts_tfm_ctx *ctx = crypto_blkcipher_ctx(tfm); - struct blkcipher_walk walk; - __le64 tweak; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, SPECK_NEON_CHUNK_SIZE); - - crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - const u8 *src = walk.src.virt.addr; - - if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { - unsigned int count; - - count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); - kernel_neon_begin(); - (*crypt_many)(ctx->main_key.round_keys, - ctx->main_key.nrounds, - dst, src, count, &tweak); - kernel_neon_end(); - dst += count; - src += count; - nbytes -= count; - } - - /* Handle any remainder with generic code */ - while (nbytes >= sizeof(tweak)) { - *(__le64 *)dst = *(__le64 *)src ^ tweak; - (*crypt_one)(&ctx->main_key, dst, dst); - *(__le64 *)dst ^= tweak; - tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^ - ((tweak & cpu_to_le64(1ULL << 63)) ? - 0x1B : 0)); - dst += sizeof(tweak); - src += sizeof(tweak); - nbytes -= sizeof(tweak); - } - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static int speck64_xts_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - return __speck64_xts_crypt(desc, dst, src, nbytes, - crypto_speck64_encrypt, - speck64_xts_encrypt_neon); -} - -static int speck64_xts_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - return __speck64_xts_crypt(desc, dst, src, nbytes, - crypto_speck64_decrypt, - speck64_xts_decrypt_neon); -} - -static int speck64_xts_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck64_xts_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - if (keylen % 2) - return -EINVAL; - - keylen /= 2; - - err = crypto_speck64_setkey(&ctx->main_key, key, keylen); - if (err) - return err; - - return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen); -} - -static struct crypto_alg speck_algs[] = { - { - .cra_name = "xts(speck128)", - .cra_driver_name = "xts-speck128-neon", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SPECK128_BLOCK_SIZE, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = 2 * SPECK128_128_KEY_SIZE, - .max_keysize = 2 * SPECK128_256_KEY_SIZE, - .ivsize = SPECK128_BLOCK_SIZE, - .setkey = speck128_xts_setkey, - .encrypt = speck128_xts_encrypt, - .decrypt = speck128_xts_decrypt, - } - } - }, { - .cra_name = "xts(speck64)", - .cra_driver_name = "xts-speck64-neon", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SPECK64_BLOCK_SIZE, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = 2 * SPECK64_96_KEY_SIZE, - .max_keysize = 2 * SPECK64_128_KEY_SIZE, - .ivsize = SPECK64_BLOCK_SIZE, - .setkey = speck64_xts_setkey, - .encrypt = speck64_xts_encrypt, - .decrypt = speck64_xts_decrypt, - } - } - } -}; - -static int __init speck_neon_module_init(void) -{ - if (!(elf_hwcap & HWCAP_ASIMD)) - return -ENODEV; - return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs)); -} - -static void __exit speck_neon_module_exit(void) -{ - crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs)); -} - -module_init(speck_neon_module_init); -module_exit(speck_neon_module_exit); - -MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers "); -MODULE_ALIAS_CRYPTO("xts(speck128)"); -MODULE_ALIAS_CRYPTO("xts-speck128-neon"); -MODULE_ALIAS_CRYPTO("xts(speck64)"); -MODULE_ALIAS_CRYPTO("xts-speck64-neon"); From ea2ad7b4110577437f968af5d51ec284ac463ea6 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 22 Oct 2018 14:13:49 -0700 Subject: [PATCH 006/189] Revert "BACKPORT, FROMGIT: crypto: speck - add test vectors for Speck64-XTS" This reverts commit de4b647c0124a928b458d7d79927a3cd0a7462e3. Bug: 116008047 Change-Id: I96897223f5daced94e3d62ae817c2de2b59b417f Signed-off-by: Alistair Strachan --- crypto/testmgr.c | 15 -- crypto/testmgr.h | 671 ----------------------------------------------- 2 files changed, 686 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 2329b5f16b8c..c9a965c47d37 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3903,21 +3903,6 @@ static const struct alg_test_desc alg_test_descs[] = { } } } - }, { - .alg = "xts(speck64)", - .test = alg_test_skcipher, - .suite = { - .cipher = { - .enc = { - .vecs = speck64_xts_enc_tv_template, - .count = ARRAY_SIZE(speck64_xts_enc_tv_template) - }, - .dec = { - .vecs = speck64_xts_dec_tv_template, - .count = ARRAY_SIZE(speck64_xts_dec_tv_template) - } - } - } }, { .alg = "xts(twofish)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 58072e1a4def..63e104bb8d72 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -13404,677 +13404,6 @@ static struct cipher_testvec speck64_dec_tv_template[] = { }, }; -/* - * Speck64-XTS test vectors, taken from the AES-XTS test vectors with the result - * recomputed with Speck64 as the cipher, and key lengths adjusted - */ - -static struct cipher_testvec speck64_xts_enc_tv_template[] = { - { - .key = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 24, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .ilen = 32, - .result = "\x84\xaf\x54\x07\x19\xd4\x7c\xa6" - "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2" - "\x80\xf5\x72\xe7\xcd\xf0\x99\x22" - "\x35\xa7\x2f\x06\xef\xdc\x51\xaa", - .rlen = 32, - }, { - .key = "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 24, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .ilen = 32, - .result = "\x12\x56\x73\xcd\x15\x87\xa8\x59" - "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f" - "\xb3\x12\x69\x7e\x36\xeb\x52\xff" - "\x62\xdd\xba\x90\xb3\xe1\xee\x99", - .rlen = 32, - }, { - .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" - "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 24, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .ilen = 32, - .result = "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c" - "\x27\x36\xc0\xbf\x5d\xea\x36\x37" - "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b" - "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34", - .rlen = 32, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x31\x41\x59\x26\x53\x58\x97\x93", - .klen = 24, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .ilen = 512, - .result = "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e" - "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09" - "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3" - "\x11\xc7\x39\x96\xd0\x95\xf4\x56" - "\xf4\xdd\x03\x38\x01\x44\x2c\xcf" - "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66" - "\xfe\x3d\xc6\xfb\x01\x23\x51\x43" - "\xd5\xd2\x13\x86\x94\x34\xe9\x62" - "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef" - "\x76\x35\x04\x3f\xdb\x23\x9d\x0b" - "\x85\x42\xb9\x02\xd6\xcc\xdb\x96" - "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d" - "\xae\xd2\x04\xd5\xda\xc1\x7e\x24" - "\x8c\x73\xbe\x48\x7e\xcf\x65\x28" - "\x29\xe5\xbe\x54\x30\xcb\x46\x95" - "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe" - "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69" - "\xa1\x09\x95\x71\x26\xe9\xc4\xdf" - "\xe6\x31\xc3\x46\xda\xaf\x0b\x41" - "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3" - "\x82\xc0\x37\x27\xfc\x91\xa7\x05" - "\xfb\xc5\xdc\x2b\x74\x96\x48\x43" - "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f" - "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a" - "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c" - "\x07\xff\xf3\x72\x74\x48\xb5\x40" - "\x50\xb5\xdd\x90\x43\x31\x18\x15" - "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a" - "\x29\x93\x90\x8b\xda\x07\xf0\x35" - "\x6d\x90\x88\x09\x4e\x83\xf5\x5b" - "\x94\x12\xbb\x33\x27\x1d\x3f\x23" - "\x51\xa8\x7c\x07\xa2\xae\x77\xa6" - "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f" - "\x66\xdd\xcd\x75\x24\x8b\x33\xf7" - "\x20\xdb\x83\x9b\x4f\x11\x63\x6e" - "\xcf\x37\xef\xc9\x11\x01\x5c\x45" - "\x32\x99\x7c\x3c\x9e\x42\x89\xe3" - "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05" - "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc" - "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d" - "\xa0\xa8\x89\x3b\x73\x39\xa5\x94" - "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89" - "\x10\xff\xaf\xef\xca\xdd\x4f\x80" - "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7" - "\x33\xca\x00\x8b\x8b\x3f\xea\xec" - "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f" - "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5" - "\x64\xa3\xf1\x1a\x76\x28\xcc\x35" - "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b" - "\xc7\x1b\x53\x17\x02\xea\xd1\xad" - "\x13\x51\x73\xc0\xa0\xb2\x05\x32" - "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19" - "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d" - "\x59\xda\xee\x1a\x22\x18\xda\x0d" - "\x88\x0f\x55\x8b\x72\x62\xfd\xc1" - "\x69\x13\xcd\x0d\x5f\xc1\x09\x52" - "\xee\xd6\xe3\x84\x4d\xee\xf6\x88" - "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f" - "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54" - "\x7d\x69\x8d\x00\x62\x77\x0d\x14" - "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3" - "\x50\xf7\x5f\xf4\xc2\xca\x41\x97" - "\x37\xbe\x75\x74\xcd\xf0\x75\x6e" - "\x25\x23\x94\xbd\xda\x8d\xb0\xd4", - .rlen = 512, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x62\x49\x77\x57\x24\x70\x93\x69" - "\x99\x59\x57\x49\x66\x96\x76\x27", - .klen = 32, - .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .ilen = 512, - .result = "\x55\xed\x71\xd3\x02\x8e\x15\x3b" - "\xc6\x71\x29\x2d\x3e\x89\x9f\x59" - "\x68\x6a\xcc\x8a\x56\x97\xf3\x95" - "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c" - "\x78\x16\xea\x80\xdb\x33\x75\x94" - "\xf9\x29\xc4\x2b\x76\x75\x97\xc7" - "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b" - "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee" - "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a" - "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c" - "\xf5\xec\x32\x74\xa3\xb8\x03\x88" - "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f" - "\x84\x5e\x46\xed\x20\x89\xb6\x44" - "\x8d\xd0\xed\x54\x47\x16\xbe\x95" - "\x8a\xb3\x6b\x72\xc4\x32\x52\x13" - "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6" - "\x44\x18\xdd\x8c\x6e\xca\x6e\x45" - "\x8f\x1e\x10\x07\x57\x25\x98\x7b" - "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8" - "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb" - "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff" - "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e" - "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d" - "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65" - "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a" - "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a" - "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78" - "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3" - "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e" - "\x35\x10\x30\x82\x0d\xe7\xc5\x9b" - "\xde\x44\x18\xbd\x9f\xd1\x45\xa9" - "\x7b\x7a\x4a\xad\x35\x65\x27\xca" - "\xb2\xc3\xd4\x9b\x71\x86\x70\xee" - "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf" - "\xfc\x42\xc8\x31\x59\xbe\x16\x60" - "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14" - "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef" - "\x52\x7f\x29\x51\x94\x20\x67\x3c" - "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63" - "\xe7\xff\x73\x25\xd1\xdd\x96\x8a" - "\x98\x52\x6d\xf3\xac\x3e\xf2\x18" - "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed" - "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e" - "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad" - "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa" - "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81" - "\x65\x53\x0f\x41\x11\xbd\x98\x99" - "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d" - "\x84\x98\xf9\x34\xed\x33\x2a\x1f" - "\x82\xed\xc1\x73\x98\xd3\x02\xdc" - "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76" - "\x63\x51\x34\x9d\x96\x12\xae\xce" - "\x83\xc9\x76\x5e\xa4\x1b\x53\x37" - "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d" - "\x54\x27\x74\xbb\x10\x86\x57\x46" - "\x68\xe1\xed\x14\xe7\x9d\xfc\x84" - "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf" - "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d" - "\x7b\x4f\x38\x55\x36\x71\x64\xc1" - "\xfc\x5c\x75\x52\x33\x02\x18\xf8" - "\x17\xe1\x2b\xc2\x43\x39\xbd\x76" - "\x9b\x63\x76\x32\x2f\x19\x72\x10" - "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5" - "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c", - .rlen = 512, - .also_non_np = 1, - .np = 3, - .tap = { 512 - 20, 4, 16 }, - } -}; - -static struct cipher_testvec speck64_xts_dec_tv_template[] = { - { - .key = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 24, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x84\xaf\x54\x07\x19\xd4\x7c\xa6" - "\xe4\xfe\xdf\xc4\x1f\x34\xc3\xc2" - "\x80\xf5\x72\xe7\xcd\xf0\x99\x22" - "\x35\xa7\x2f\x06\xef\xdc\x51\xaa", - .ilen = 32, - .result = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .rlen = 32, - }, { - .key = "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 24, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x12\x56\x73\xcd\x15\x87\xa8\x59" - "\xcf\x84\xae\xd9\x1c\x66\xd6\x9f" - "\xb3\x12\x69\x7e\x36\xeb\x52\xff" - "\x62\xdd\xba\x90\xb3\xe1\xee\x99", - .ilen = 32, - .result = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .rlen = 32, - }, { - .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" - "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 24, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x15\x1b\xe4\x2c\xa2\x5a\x2d\x2c" - "\x27\x36\xc0\xbf\x5d\xea\x36\x37" - "\x2d\x1a\x88\xbc\x66\xb5\xd0\x0b" - "\xa1\xbc\x19\xb2\x0f\x3b\x75\x34", - .ilen = 32, - .result = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .rlen = 32, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x31\x41\x59\x26\x53\x58\x97\x93", - .klen = 24, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\xaf\xa1\x81\xa6\x32\xbb\x15\x8e" - "\xf8\x95\x2e\xd3\xe6\xee\x7e\x09" - "\x0c\x1a\xf5\x02\x97\x8b\xe3\xb3" - "\x11\xc7\x39\x96\xd0\x95\xf4\x56" - "\xf4\xdd\x03\x38\x01\x44\x2c\xcf" - "\x88\xae\x8e\x3c\xcd\xe7\xaa\x66" - "\xfe\x3d\xc6\xfb\x01\x23\x51\x43" - "\xd5\xd2\x13\x86\x94\x34\xe9\x62" - "\xf9\x89\xe3\xd1\x7b\xbe\xf8\xef" - "\x76\x35\x04\x3f\xdb\x23\x9d\x0b" - "\x85\x42\xb9\x02\xd6\xcc\xdb\x96" - "\xa7\x6b\x27\xb6\xd4\x45\x8f\x7d" - "\xae\xd2\x04\xd5\xda\xc1\x7e\x24" - "\x8c\x73\xbe\x48\x7e\xcf\x65\x28" - "\x29\xe5\xbe\x54\x30\xcb\x46\x95" - "\x4f\x2e\x8a\x36\xc8\x27\xc5\xbe" - "\xd0\x1a\xaf\xab\x26\xcd\x9e\x69" - "\xa1\x09\x95\x71\x26\xe9\xc4\xdf" - "\xe6\x31\xc3\x46\xda\xaf\x0b\x41" - "\x1f\xab\xb1\x8e\xd6\xfc\x0b\xb3" - "\x82\xc0\x37\x27\xfc\x91\xa7\x05" - "\xfb\xc5\xdc\x2b\x74\x96\x48\x43" - "\x5d\x9c\x19\x0f\x60\x63\x3a\x1f" - "\x6f\xf0\x03\xbe\x4d\xfd\xc8\x4a" - "\xc6\xa4\x81\x6d\xc3\x12\x2a\x5c" - "\x07\xff\xf3\x72\x74\x48\xb5\x40" - "\x50\xb5\xdd\x90\x43\x31\x18\x15" - "\x7b\xf2\xa6\xdb\x83\xc8\x4b\x4a" - "\x29\x93\x90\x8b\xda\x07\xf0\x35" - "\x6d\x90\x88\x09\x4e\x83\xf5\x5b" - "\x94\x12\xbb\x33\x27\x1d\x3f\x23" - "\x51\xa8\x7c\x07\xa2\xae\x77\xa6" - "\x50\xfd\xcc\xc0\x4f\x80\x7a\x9f" - "\x66\xdd\xcd\x75\x24\x8b\x33\xf7" - "\x20\xdb\x83\x9b\x4f\x11\x63\x6e" - "\xcf\x37\xef\xc9\x11\x01\x5c\x45" - "\x32\x99\x7c\x3c\x9e\x42\x89\xe3" - "\x70\x6d\x15\x9f\xb1\xe6\xb6\x05" - "\xfe\x0c\xb9\x49\x2d\x90\x6d\xcc" - "\x5d\x3f\xc1\xfe\x89\x0a\x2e\x2d" - "\xa0\xa8\x89\x3b\x73\x39\xa5\x94" - "\x4c\xa4\xa6\xbb\xa7\x14\x46\x89" - "\x10\xff\xaf\xef\xca\xdd\x4f\x80" - "\xb3\xdf\x3b\xab\xd4\xe5\x5a\xc7" - "\x33\xca\x00\x8b\x8b\x3f\xea\xec" - "\x68\x8a\xc2\x6d\xfd\xd4\x67\x0f" - "\x22\x31\xe1\x0e\xfe\x5a\x04\xd5" - "\x64\xa3\xf1\x1a\x76\x28\xcc\x35" - "\x36\xa7\x0a\x74\xf7\x1c\x44\x9b" - "\xc7\x1b\x53\x17\x02\xea\xd1\xad" - "\x13\x51\x73\xc0\xa0\xb2\x05\x32" - "\xa8\xa2\x37\x2e\xe1\x7a\x3a\x19" - "\x26\xb4\x6c\x62\x5d\xb3\x1a\x1d" - "\x59\xda\xee\x1a\x22\x18\xda\x0d" - "\x88\x0f\x55\x8b\x72\x62\xfd\xc1" - "\x69\x13\xcd\x0d\x5f\xc1\x09\x52" - "\xee\xd6\xe3\x84\x4d\xee\xf6\x88" - "\xaf\x83\xdc\x76\xf4\xc0\x93\x3f" - "\x4a\x75\x2f\xb0\x0b\x3e\xc4\x54" - "\x7d\x69\x8d\x00\x62\x77\x0d\x14" - "\xbe\x7c\xa6\x7d\xc5\x24\x4f\xf3" - "\x50\xf7\x5f\xf4\xc2\xca\x41\x97" - "\x37\xbe\x75\x74\xcd\xf0\x75\x6e" - "\x25\x23\x94\xbd\xda\x8d\xb0\xd4", - .ilen = 512, - .result = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .rlen = 512, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x62\x49\x77\x57\x24\x70\x93\x69" - "\x99\x59\x57\x49\x66\x96\x76\x27", - .klen = 32, - .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x55\xed\x71\xd3\x02\x8e\x15\x3b" - "\xc6\x71\x29\x2d\x3e\x89\x9f\x59" - "\x68\x6a\xcc\x8a\x56\x97\xf3\x95" - "\x4e\x51\x08\xda\x2a\xf8\x6f\x3c" - "\x78\x16\xea\x80\xdb\x33\x75\x94" - "\xf9\x29\xc4\x2b\x76\x75\x97\xc7" - "\xf2\x98\x2c\xf9\xff\xc8\xd5\x2b" - "\x18\xf1\xaf\xcf\x7c\xc5\x0b\xee" - "\xad\x3c\x76\x7c\xe6\x27\xa2\x2a" - "\xe4\x66\xe1\xab\xa2\x39\xfc\x7c" - "\xf5\xec\x32\x74\xa3\xb8\x03\x88" - "\x52\xfc\x2e\x56\x3f\xa1\xf0\x9f" - "\x84\x5e\x46\xed\x20\x89\xb6\x44" - "\x8d\xd0\xed\x54\x47\x16\xbe\x95" - "\x8a\xb3\x6b\x72\xc4\x32\x52\x13" - "\x1b\xb0\x82\xbe\xac\xf9\x70\xa6" - "\x44\x18\xdd\x8c\x6e\xca\x6e\x45" - "\x8f\x1e\x10\x07\x57\x25\x98\x7b" - "\x17\x8c\x78\xdd\x80\xa7\xd9\xd8" - "\x63\xaf\xb9\x67\x57\xfd\xbc\xdb" - "\x44\xe9\xc5\x65\xd1\xc7\x3b\xff" - "\x20\xa0\x80\x1a\xc3\x9a\xad\x5e" - "\x5d\x3b\xd3\x07\xd9\xf5\xfd\x3d" - "\x4a\x8b\xa8\xd2\x6e\x7a\x51\x65" - "\x6c\x8e\x95\xe0\x45\xc9\x5f\x4a" - "\x09\x3c\x3d\x71\x7f\x0c\x84\x2a" - "\xc8\x48\x52\x1a\xc2\xd5\xd6\x78" - "\x92\x1e\xa0\x90\x2e\xea\xf0\xf3" - "\xdc\x0f\xb1\xaf\x0d\x9b\x06\x2e" - "\x35\x10\x30\x82\x0d\xe7\xc5\x9b" - "\xde\x44\x18\xbd\x9f\xd1\x45\xa9" - "\x7b\x7a\x4a\xad\x35\x65\x27\xca" - "\xb2\xc3\xd4\x9b\x71\x86\x70\xee" - "\xf1\x89\x3b\x85\x4b\x5b\xaa\xaf" - "\xfc\x42\xc8\x31\x59\xbe\x16\x60" - "\x4f\xf9\xfa\x12\xea\xd0\xa7\x14" - "\xf0\x7a\xf3\xd5\x8d\xbd\x81\xef" - "\x52\x7f\x29\x51\x94\x20\x67\x3c" - "\xd1\xaf\x77\x9f\x22\x5a\x4e\x63" - "\xe7\xff\x73\x25\xd1\xdd\x96\x8a" - "\x98\x52\x6d\xf3\xac\x3e\xf2\x18" - "\x6d\xf6\x0a\x29\xa6\x34\x3d\xed" - "\xe3\x27\x0d\x9d\x0a\x02\x44\x7e" - "\x5a\x7e\x67\x0f\x0a\x9e\xd6\xad" - "\x91\xe6\x4d\x81\x8c\x5c\x59\xaa" - "\xfb\xeb\x56\x53\xd2\x7d\x4c\x81" - "\x65\x53\x0f\x41\x11\xbd\x98\x99" - "\xf9\xc6\xfa\x51\x2e\xa3\xdd\x8d" - "\x84\x98\xf9\x34\xed\x33\x2a\x1f" - "\x82\xed\xc1\x73\x98\xd3\x02\xdc" - "\xe6\xc2\x33\x1d\xa2\xb4\xca\x76" - "\x63\x51\x34\x9d\x96\x12\xae\xce" - "\x83\xc9\x76\x5e\xa4\x1b\x53\x37" - "\x17\xd5\xc0\x80\x1d\x62\xf8\x3d" - "\x54\x27\x74\xbb\x10\x86\x57\x46" - "\x68\xe1\xed\x14\xe7\x9d\xfc\x84" - "\x47\xbc\xc2\xf8\x19\x4b\x99\xcf" - "\x7a\xe9\xc4\xb8\x8c\x82\x72\x4d" - "\x7b\x4f\x38\x55\x36\x71\x64\xc1" - "\xfc\x5c\x75\x52\x33\x02\x18\xf8" - "\x17\xe1\x2b\xc2\x43\x39\xbd\x76" - "\x9b\x63\x76\x32\x2f\x19\x72\x10" - "\x9f\x21\x0c\xf1\x66\x50\x7f\xa5" - "\x0d\x1f\x46\xe0\xba\xd3\x2f\x3c", - .ilen = 512, - .result = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .rlen = 512, - .also_non_np = 1, - .np = 3, - .tap = { 512 - 20, 4, 16 }, - } -}; - /* Cast6 test vectors from RFC 2612 */ #define CAST6_ENC_TEST_VECTORS 4 #define CAST6_DEC_TEST_VECTORS 4 From 559e4a2616afa0ff683a3cbf8ec2ee662cfadeb7 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 22 Oct 2018 14:14:18 -0700 Subject: [PATCH 007/189] Revert "BACKPORT, FROMGIT: crypto: speck - add test vectors for Speck128-XTS" This reverts commit b37c1244dd074b052247290a8c8b1eb9a02b4f84. Bug: 116008047 Change-Id: Id68b333c66c89bc69e61bdd96d21fa6bc6dbf3b8 Signed-off-by: Alistair Strachan --- crypto/testmgr.c | 15 -- crypto/testmgr.h | 687 ----------------------------------------------- 2 files changed, 702 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index c9a965c47d37..4760b564dc17 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3888,21 +3888,6 @@ static const struct alg_test_desc alg_test_descs[] = { } } } - }, { - .alg = "xts(speck128)", - .test = alg_test_skcipher, - .suite = { - .cipher = { - .enc = { - .vecs = speck128_xts_enc_tv_template, - .count = ARRAY_SIZE(speck128_xts_enc_tv_template) - }, - .dec = { - .vecs = speck128_xts_dec_tv_template, - .count = ARRAY_SIZE(speck128_xts_dec_tv_template) - } - } - } }, { .alg = "xts(twofish)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 63e104bb8d72..9b59a21a89e8 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -12677,693 +12677,6 @@ static struct cipher_testvec speck128_dec_tv_template[] = { }, }; -/* - * Speck128-XTS test vectors, taken from the AES-XTS test vectors with the - * result recomputed with Speck128 as the cipher - */ - -static struct cipher_testvec speck128_xts_enc_tv_template[] = { - { - .key = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 32, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .ilen = 32, - .result = "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62" - "\x3b\x99\x4a\x64\x74\x77\xac\xed" - "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42" - "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54", - .rlen = 32, - }, { - .key = "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x22\x22\x22\x22\x22\x22\x22\x22" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 32, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .ilen = 32, - .result = "\xfb\x53\x81\x75\x6f\x9f\x34\xad" - "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a" - "\xd4\x84\xa4\x53\xd5\x88\x73\x1b" - "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6", - .rlen = 32, - }, { - .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" - "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" - "\x22\x22\x22\x22\x22\x22\x22\x22" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 32, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .ilen = 32, - .result = "\x21\x52\x84\x15\xd1\xf7\x21\x55" - "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d" - "\xda\x63\xb2\xf1\x82\xb0\x89\x59" - "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92", - .rlen = 32, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x31\x41\x59\x26\x53\x58\x97\x93" - "\x23\x84\x62\x64\x33\x83\x27\x95", - .klen = 32, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .ilen = 512, - .result = "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82" - "\x53\xd0\xed\x2d\x30\xc1\x20\xef" - "\x70\x67\x5e\xff\x09\x70\xbb\xc1" - "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48" - "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7" - "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9" - "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44" - "\x19\xc5\x58\x84\x63\xb9\x12\x68" - "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c" - "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd" - "\x74\x79\x2e\xb4\x44\xd7\x69\xc4" - "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d" - "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb" - "\x6d\x13\x65\xa0\xf9\x31\x12\xe2" - "\x26\xd1\xec\x2b\x0a\x8b\x59\x99" - "\xa7\x49\xa0\x0e\x09\x33\x85\x50" - "\xc3\x23\xca\x7a\xdd\x13\x45\x5f" - "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f" - "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6" - "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f" - "\x79\x91\x8d\x36\x13\x7b\xd0\x4a" - "\x6c\x39\xfb\x53\xb8\x6f\x02\x51" - "\xa5\x20\xac\x24\x1c\x73\x59\x73" - "\x58\x61\x3a\x87\x58\xb3\x20\x56" - "\x39\x06\x2b\x4d\xd3\x20\x2b\x89" - "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd" - "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91" - "\x09\x35\x71\x50\x65\xac\x92\xe3" - "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92" - "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9" - "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d" - "\x77\x04\x80\xa9\xbf\x38\xb5\xbd" - "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8" - "\x2a\x26\xcc\x49\x14\x6d\x55\x01" - "\x06\x94\xd8\xb2\x2d\x53\x83\x1b" - "\x8f\xd4\xdd\x57\x12\x7e\x18\xba" - "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d" - "\x24\xa9\x60\xa4\x97\x85\x86\x2a" - "\x01\x00\x09\xf1\xcb\x4a\x24\x1c" - "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4" - "\x97\x1c\x10\xc6\x4d\x66\x4f\x98" - "\x87\x30\xac\xd5\xea\x73\x49\x10" - "\x80\xea\xe5\x5f\x4d\x5f\x03\x33" - "\x66\x02\x35\x3d\x60\x06\x36\x4f" - "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8" - "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28" - "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93" - "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30" - "\xcc\x75\xcf\x16\x26\xa9\x26\x3b" - "\xe7\x68\x2f\x15\x21\x5b\xe4\x00" - "\xbd\x48\x50\xcd\x75\x70\xc4\x62" - "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b" - "\x51\x66\x02\x69\x04\x97\x36\xd4" - "\x75\xae\x0b\xa3\x42\xf8\xca\x79" - "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2" - "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd" - "\xea\x15\x5a\xa0\x85\x7e\x81\x0d" - "\x03\xe7\x05\x39\xf5\x05\x26\xee" - "\xec\xaa\x1f\x3d\xc9\x98\x76\x01" - "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4" - "\x50\x65\x50\x6d\x04\x1f\xdf\x5a" - "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca" - "\x47\x26\xef\x39\xb8\xb4\xf2\xd1" - "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf", - .rlen = 512, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x62\x49\x77\x57\x24\x70\x93\x69" - "\x99\x59\x57\x49\x66\x96\x76\x27" - "\x31\x41\x59\x26\x53\x58\x97\x93" - "\x23\x84\x62\x64\x33\x83\x27\x95" - "\x02\x88\x41\x97\x16\x93\x99\x37" - "\x51\x05\x82\x09\x74\x94\x45\x92", - .klen = 64, - .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .ilen = 512, - .result = "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1" - "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb" - "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73" - "\x92\x99\xde\xd3\x76\xed\xcd\x63" - "\x64\x3a\x22\x57\xc1\x43\x49\xd4" - "\x79\x36\x31\x19\x62\xae\x10\x7e" - "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa" - "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0" - "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00" - "\xfc\x81\x99\x8a\x14\x62\xf5\x7e" - "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec" - "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6" - "\x62\x62\x37\xfe\x0a\x4c\x4a\x37" - "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e" - "\x85\x3c\x4f\x26\x64\x85\xbc\x68" - "\xb0\xe0\x86\x5e\x26\x41\xce\x11" - "\x50\xda\x97\x14\xe9\x9e\xc7\x6d" - "\x3b\xdc\x43\xde\x2b\x27\x69\x7d" - "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31" - "\x14\x4d\xf0\x74\x37\xfd\x07\x25" - "\x96\x55\xe5\xfc\x9e\x27\x2a\x74" - "\x1b\x83\x4d\x15\x83\xac\x57\xa0" - "\xac\xa5\xd0\x38\xef\x19\x56\x53" - "\x25\x4b\xfc\xce\x04\x23\xe5\x6b" - "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5" - "\xed\x22\x34\x1c\x5d\xed\x17\x06" - "\x36\xa3\xe6\x77\xb9\x97\x46\xb8" - "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc" - "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82" - "\x35\x91\x3d\x1b\xe4\x97\x9f\x92" - "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1" - "\x8d\x39\xfc\x42\xfb\x38\x80\xb9" - "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1" - "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7" - "\xa1\xbf\xf7\xda\x95\x93\x4b\x78" - "\x19\xf5\x94\xf9\xd2\x00\x33\x37" - "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee" - "\x42\xb2\x9e\x2c\x5f\x48\x23\x26" - "\x15\x25\x17\x03\x3d\xfe\x2c\xfc" - "\xeb\xba\xda\xe0\x00\x05\xb6\xa6" - "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf" - "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a" - "\x49\xa1\xc3\xfa\x10\x52\xb9\x14" - "\xad\xb7\x73\xf8\x78\x12\xc8\x59" - "\x17\x80\x4c\x57\x39\xf1\x6d\x80" - "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21" - "\xec\xce\xb7\xc8\x02\x8a\xed\x53" - "\x2c\x25\x68\x2e\x1f\x85\x5e\x67" - "\xd1\x07\x7a\x3a\x89\x08\xe0\x34" - "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40" - "\x31\x15\x72\xa0\xf0\x73\xd9\x3b" - "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2" - "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8" - "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6" - "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58" - "\xcc\x1f\x48\x49\x65\x47\x75\xe9" - "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07" - "\xf2\xec\x76\xd8\x8f\x09\xf3\x16" - "\xa1\x51\x89\x3b\xeb\x96\x42\xac" - "\x65\xe0\x67\x63\x29\xdc\xb4\x7d" - "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb" - "\x66\x8d\x13\xca\xe0\x59\x2a\x00" - "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5" - "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c", - .rlen = 512, - .also_non_np = 1, - .np = 3, - .tap = { 512 - 20, 4, 16 }, - } -}; - -static struct cipher_testvec speck128_xts_dec_tv_template[] = { - { - .key = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .klen = 32, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\xbe\xa0\xe7\x03\xd7\xfe\xab\x62" - "\x3b\x99\x4a\x64\x74\x77\xac\xed" - "\xd8\xf4\xa6\xcf\xae\xb9\x07\x42" - "\x51\xd9\xb6\x1d\xe0\x5e\xbc\x54", - .ilen = 32, - .result = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .rlen = 32, - }, { - .key = "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x11\x11\x11\x11\x11\x11\x11\x11" - "\x22\x22\x22\x22\x22\x22\x22\x22" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 32, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\xfb\x53\x81\x75\x6f\x9f\x34\xad" - "\x7e\x01\xed\x7b\xcc\xda\x4e\x4a" - "\xd4\x84\xa4\x53\xd5\x88\x73\x1b" - "\xfd\xcb\xae\x0d\xf3\x04\xee\xe6", - .ilen = 32, - .result = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .rlen = 32, - }, { - .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" - "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" - "\x22\x22\x22\x22\x22\x22\x22\x22" - "\x22\x22\x22\x22\x22\x22\x22\x22", - .klen = 32, - .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x21\x52\x84\x15\xd1\xf7\x21\x55" - "\xd9\x75\x4a\xd3\xc5\xdb\x9f\x7d" - "\xda\x63\xb2\xf1\x82\xb0\x89\x59" - "\x86\xd4\xaa\xaa\xdd\xff\x4f\x92", - .ilen = 32, - .result = "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44" - "\x44\x44\x44\x44\x44\x44\x44\x44", - .rlen = 32, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x31\x41\x59\x26\x53\x58\x97\x93" - "\x23\x84\x62\x64\x33\x83\x27\x95", - .klen = 32, - .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\x57\xb5\xf8\x71\x6e\x6d\xdd\x82" - "\x53\xd0\xed\x2d\x30\xc1\x20\xef" - "\x70\x67\x5e\xff\x09\x70\xbb\xc1" - "\x3a\x7b\x48\x26\xd9\x0b\xf4\x48" - "\xbe\xce\xb1\xc7\xb2\x67\xc4\xa7" - "\x76\xf8\x36\x30\xb7\xb4\x9a\xd9" - "\xf5\x9d\xd0\x7b\xc1\x06\x96\x44" - "\x19\xc5\x58\x84\x63\xb9\x12\x68" - "\x68\xc7\xaa\x18\x98\xf2\x1f\x5c" - "\x39\xa6\xd8\x32\x2b\xc3\x51\xfd" - "\x74\x79\x2e\xb4\x44\xd7\x69\xc4" - "\xfc\x29\xe6\xed\x26\x1e\xa6\x9d" - "\x1c\xbe\x00\x0e\x7f\x3a\xca\xfb" - "\x6d\x13\x65\xa0\xf9\x31\x12\xe2" - "\x26\xd1\xec\x2b\x0a\x8b\x59\x99" - "\xa7\x49\xa0\x0e\x09\x33\x85\x50" - "\xc3\x23\xca\x7a\xdd\x13\x45\x5f" - "\xde\x4c\xa7\xcb\x00\x8a\x66\x6f" - "\xa2\xb6\xb1\x2e\xe1\xa0\x18\xf6" - "\xad\xf3\xbd\xeb\xc7\xef\x55\x4f" - "\x79\x91\x8d\x36\x13\x7b\xd0\x4a" - "\x6c\x39\xfb\x53\xb8\x6f\x02\x51" - "\xa5\x20\xac\x24\x1c\x73\x59\x73" - "\x58\x61\x3a\x87\x58\xb3\x20\x56" - "\x39\x06\x2b\x4d\xd3\x20\x2b\x89" - "\x3f\xa2\xf0\x96\xeb\x7f\xa4\xcd" - "\x11\xae\xbd\xcb\x3a\xb4\xd9\x91" - "\x09\x35\x71\x50\x65\xac\x92\xe3" - "\x7b\x32\xc0\x7a\xdd\xd4\xc3\x92" - "\x6f\xeb\x79\xde\x6f\xd3\x25\xc9" - "\xcd\x63\xf5\x1e\x7a\x3b\x26\x9d" - "\x77\x04\x80\xa9\xbf\x38\xb5\xbd" - "\xb8\x05\x07\xbd\xfd\xab\x7b\xf8" - "\x2a\x26\xcc\x49\x14\x6d\x55\x01" - "\x06\x94\xd8\xb2\x2d\x53\x83\x1b" - "\x8f\xd4\xdd\x57\x12\x7e\x18\xba" - "\x8e\xe2\x4d\x80\xef\x7e\x6b\x9d" - "\x24\xa9\x60\xa4\x97\x85\x86\x2a" - "\x01\x00\x09\xf1\xcb\x4a\x24\x1c" - "\xd8\xf6\xe6\x5b\xe7\x5d\xf2\xc4" - "\x97\x1c\x10\xc6\x4d\x66\x4f\x98" - "\x87\x30\xac\xd5\xea\x73\x49\x10" - "\x80\xea\xe5\x5f\x4d\x5f\x03\x33" - "\x66\x02\x35\x3d\x60\x06\x36\x4f" - "\x14\x1c\xd8\x07\x1f\x78\xd0\xf8" - "\x4f\x6c\x62\x7c\x15\xa5\x7c\x28" - "\x7c\xcc\xeb\x1f\xd1\x07\x90\x93" - "\x7e\xc2\xa8\x3a\x80\xc0\xf5\x30" - "\xcc\x75\xcf\x16\x26\xa9\x26\x3b" - "\xe7\x68\x2f\x15\x21\x5b\xe4\x00" - "\xbd\x48\x50\xcd\x75\x70\xc4\x62" - "\xbb\x41\xfb\x89\x4a\x88\x3b\x3b" - "\x51\x66\x02\x69\x04\x97\x36\xd4" - "\x75\xae\x0b\xa3\x42\xf8\xca\x79" - "\x8f\x93\xe9\xcc\x38\xbd\xd6\xd2" - "\xf9\x70\x4e\xc3\x6a\x8e\x25\xbd" - "\xea\x15\x5a\xa0\x85\x7e\x81\x0d" - "\x03\xe7\x05\x39\xf5\x05\x26\xee" - "\xec\xaa\x1f\x3d\xc9\x98\x76\x01" - "\x2c\xf4\xfc\xa3\x88\x77\x38\xc4" - "\x50\x65\x50\x6d\x04\x1f\xdf\x5a" - "\xaa\xf2\x01\xa9\xc1\x8d\xee\xca" - "\x47\x26\xef\x39\xb8\xb4\xf2\xd1" - "\xd6\xbb\x1b\x2a\xc1\x34\x14\xcf", - .ilen = 512, - .result = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .rlen = 512, - }, { - .key = "\x27\x18\x28\x18\x28\x45\x90\x45" - "\x23\x53\x60\x28\x74\x71\x35\x26" - "\x62\x49\x77\x57\x24\x70\x93\x69" - "\x99\x59\x57\x49\x66\x96\x76\x27" - "\x31\x41\x59\x26\x53\x58\x97\x93" - "\x23\x84\x62\x64\x33\x83\x27\x95" - "\x02\x88\x41\x97\x16\x93\x99\x37" - "\x51\x05\x82\x09\x74\x94\x45\x92", - .klen = 64, - .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" - "\x00\x00\x00\x00\x00\x00\x00\x00", - .input = "\xc5\x85\x2a\x4b\x73\xe4\xf6\xf1" - "\x7e\xf9\xf6\xe9\xa3\x73\x36\xcb" - "\xaa\xb6\x22\xb0\x24\x6e\x3d\x73" - "\x92\x99\xde\xd3\x76\xed\xcd\x63" - "\x64\x3a\x22\x57\xc1\x43\x49\xd4" - "\x79\x36\x31\x19\x62\xae\x10\x7e" - "\x7d\xcf\x7a\xe2\x6b\xce\x27\xfa" - "\xdc\x3d\xd9\x83\xd3\x42\x4c\xe0" - "\x1b\xd6\x1d\x1a\x6f\xd2\x03\x00" - "\xfc\x81\x99\x8a\x14\x62\xf5\x7e" - "\x0d\xe7\x12\xe8\x17\x9d\x0b\xec" - "\xe2\xf7\xc9\xa7\x63\xd1\x79\xb6" - "\x62\x62\x37\xfe\x0a\x4c\x4a\x37" - "\x70\xc7\x5e\x96\x5f\xbc\x8e\x9e" - "\x85\x3c\x4f\x26\x64\x85\xbc\x68" - "\xb0\xe0\x86\x5e\x26\x41\xce\x11" - "\x50\xda\x97\x14\xe9\x9e\xc7\x6d" - "\x3b\xdc\x43\xde\x2b\x27\x69\x7d" - "\xfc\xb0\x28\xbd\x8f\xb1\xc6\x31" - "\x14\x4d\xf0\x74\x37\xfd\x07\x25" - "\x96\x55\xe5\xfc\x9e\x27\x2a\x74" - "\x1b\x83\x4d\x15\x83\xac\x57\xa0" - "\xac\xa5\xd0\x38\xef\x19\x56\x53" - "\x25\x4b\xfc\xce\x04\x23\xe5\x6b" - "\xf6\xc6\x6c\x32\x0b\xb3\x12\xc5" - "\xed\x22\x34\x1c\x5d\xed\x17\x06" - "\x36\xa3\xe6\x77\xb9\x97\x46\xb8" - "\xe9\x3f\x7e\xc7\xbc\x13\x5c\xdc" - "\x6e\x3f\x04\x5e\xd1\x59\xa5\x82" - "\x35\x91\x3d\x1b\xe4\x97\x9f\x92" - "\x1c\x5e\x5f\x6f\x41\xd4\x62\xa1" - "\x8d\x39\xfc\x42\xfb\x38\x80\xb9" - "\x0a\xe3\xcc\x6a\x93\xd9\x7a\xb1" - "\xe9\x69\xaf\x0a\x6b\x75\x38\xa7" - "\xa1\xbf\xf7\xda\x95\x93\x4b\x78" - "\x19\xf5\x94\xf9\xd2\x00\x33\x37" - "\xcf\xf5\x9e\x9c\xf3\xcc\xa6\xee" - "\x42\xb2\x9e\x2c\x5f\x48\x23\x26" - "\x15\x25\x17\x03\x3d\xfe\x2c\xfc" - "\xeb\xba\xda\xe0\x00\x05\xb6\xa6" - "\x07\xb3\xe8\x36\x5b\xec\x5b\xbf" - "\xd6\x5b\x00\x74\xc6\x97\xf1\x6a" - "\x49\xa1\xc3\xfa\x10\x52\xb9\x14" - "\xad\xb7\x73\xf8\x78\x12\xc8\x59" - "\x17\x80\x4c\x57\x39\xf1\x6d\x80" - "\x25\x77\x0f\x5e\x7d\xf0\xaf\x21" - "\xec\xce\xb7\xc8\x02\x8a\xed\x53" - "\x2c\x25\x68\x2e\x1f\x85\x5e\x67" - "\xd1\x07\x7a\x3a\x89\x08\xe0\x34" - "\xdc\xdb\x26\xb4\x6b\x77\xfc\x40" - "\x31\x15\x72\xa0\xf0\x73\xd9\x3b" - "\xd5\xdb\xfe\xfc\x8f\xa9\x44\xa2" - "\x09\x9f\xc6\x33\xe5\xe2\x88\xe8" - "\xf3\xf0\x1a\xf4\xce\x12\x0f\xd6" - "\xf7\x36\xe6\xa4\xf4\x7a\x10\x58" - "\xcc\x1f\x48\x49\x65\x47\x75\xe9" - "\x28\xe1\x65\x7b\xf2\xc4\xb5\x07" - "\xf2\xec\x76\xd8\x8f\x09\xf3\x16" - "\xa1\x51\x89\x3b\xeb\x96\x42\xac" - "\x65\xe0\x67\x63\x29\xdc\xb4\x7d" - "\xf2\x41\x51\x6a\xcb\xde\x3c\xfb" - "\x66\x8d\x13\xca\xe0\x59\x2a\x00" - "\xc9\x53\x4c\xe6\x9e\xe2\x73\xd5" - "\x67\x19\xb2\xbd\x9a\x63\xd7\x5c", - .ilen = 512, - .result = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" - "\x20\x21\x22\x23\x24\x25\x26\x27" - "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" - "\x30\x31\x32\x33\x34\x35\x36\x37" - "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" - "\x40\x41\x42\x43\x44\x45\x46\x47" - "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" - "\x50\x51\x52\x53\x54\x55\x56\x57" - "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" - "\x60\x61\x62\x63\x64\x65\x66\x67" - "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" - "\x70\x71\x72\x73\x74\x75\x76\x77" - "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" - "\x80\x81\x82\x83\x84\x85\x86\x87" - "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" - "\x90\x91\x92\x93\x94\x95\x96\x97" - "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" - "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" - "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" - "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" - "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" - "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" - "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" - "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" - "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" - "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" - "\xe8\xe9\xea\xeb\xec\xed\xee\xef" - "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" - "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", - .rlen = 512, - .also_non_np = 1, - .np = 3, - .tap = { 512 - 20, 4, 16 }, - } -}; - static struct cipher_testvec speck64_enc_tv_template[] = { { /* Speck64/96 */ .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b" From e6a822e8dd797d2605afd394f59853bb1bdd1d6e Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 22 Oct 2018 14:14:57 -0700 Subject: [PATCH 008/189] Revert "BACKPORT, FROMGIT: crypto: arm/speck - add NEON-accelerated implementation of Speck-XTS" This reverts commit f54e3ccbe8a2be3947e4238817de3e43f1da8fe9. Bug: 116008047 Change-Id: I3c76a77dfae5894b46e39266f9f8d7c7294ab615 Signed-off-by: Alistair Strachan --- arch/arm/crypto/Kconfig | 7 - arch/arm/crypto/Makefile | 2 - arch/arm/crypto/speck-neon-core.S | 432 ------------------------------ arch/arm/crypto/speck-neon-glue.c | 314 ---------------------- 4 files changed, 755 deletions(-) delete mode 100644 arch/arm/crypto/speck-neon-core.S delete mode 100644 arch/arm/crypto/speck-neon-glue.c diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 72aa5ecd5580..27ed1b1cd1d7 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -120,11 +120,4 @@ config CRYPTO_GHASH_ARM_CE that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) that is part of the ARMv8 Crypto Extensions -config CRYPTO_SPECK_NEON - tristate "NEON accelerated Speck cipher algorithms" - depends on KERNEL_MODE_NEON - select CRYPTO_BLKCIPHER - select CRYPTO_GF128MUL - select CRYPTO_SPECK - endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 1d0448a875ee..fc5150702b64 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o -obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -37,7 +36,6 @@ sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o -speck-neon-y := speck-neon-core.o speck-neon-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S deleted file mode 100644 index 3c1e203e53b9..000000000000 --- a/arch/arm/crypto/speck-neon-core.S +++ /dev/null @@ -1,432 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS - * - * Copyright (c) 2018 Google, Inc - * - * Author: Eric Biggers - */ - -#include - - .text - .fpu neon - - // arguments - ROUND_KEYS .req r0 // const {u64,u32} *round_keys - NROUNDS .req r1 // int nrounds - DST .req r2 // void *dst - SRC .req r3 // const void *src - NBYTES .req r4 // unsigned int nbytes - TWEAK .req r5 // void *tweak - - // registers which hold the data being encrypted/decrypted - X0 .req q0 - X0_L .req d0 - X0_H .req d1 - Y0 .req q1 - Y0_H .req d3 - X1 .req q2 - X1_L .req d4 - X1_H .req d5 - Y1 .req q3 - Y1_H .req d7 - X2 .req q4 - X2_L .req d8 - X2_H .req d9 - Y2 .req q5 - Y2_H .req d11 - X3 .req q6 - X3_L .req d12 - X3_H .req d13 - Y3 .req q7 - Y3_H .req d15 - - // the round key, duplicated in all lanes - ROUND_KEY .req q8 - ROUND_KEY_L .req d16 - ROUND_KEY_H .req d17 - - // index vector for vtbl-based 8-bit rotates - ROTATE_TABLE .req d18 - - // multiplication table for updating XTS tweaks - GF128MUL_TABLE .req d19 - GF64MUL_TABLE .req d19 - - // current XTS tweak value(s) - TWEAKV .req q10 - TWEAKV_L .req d20 - TWEAKV_H .req d21 - - TMP0 .req q12 - TMP0_L .req d24 - TMP0_H .req d25 - TMP1 .req q13 - TMP2 .req q14 - TMP3 .req q15 - - .align 4 -.Lror64_8_table: - .byte 1, 2, 3, 4, 5, 6, 7, 0 -.Lror32_8_table: - .byte 1, 2, 3, 0, 5, 6, 7, 4 -.Lrol64_8_table: - .byte 7, 0, 1, 2, 3, 4, 5, 6 -.Lrol32_8_table: - .byte 3, 0, 1, 2, 7, 4, 5, 6 -.Lgf128mul_table: - .byte 0, 0x87 - .fill 14 -.Lgf64mul_table: - .byte 0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b - .fill 12 - -/* - * _speck_round_128bytes() - Speck encryption round on 128 bytes at a time - * - * Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for - * Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes - * of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64. - * - * The 8-bit rotates are implemented using vtbl instead of vshr + vsli because - * the vtbl approach is faster on some processors and the same speed on others. - */ -.macro _speck_round_128bytes n - - // x = ror(x, 8) - vtbl.8 X0_L, {X0_L}, ROTATE_TABLE - vtbl.8 X0_H, {X0_H}, ROTATE_TABLE - vtbl.8 X1_L, {X1_L}, ROTATE_TABLE - vtbl.8 X1_H, {X1_H}, ROTATE_TABLE - vtbl.8 X2_L, {X2_L}, ROTATE_TABLE - vtbl.8 X2_H, {X2_H}, ROTATE_TABLE - vtbl.8 X3_L, {X3_L}, ROTATE_TABLE - vtbl.8 X3_H, {X3_H}, ROTATE_TABLE - - // x += y - vadd.u\n X0, Y0 - vadd.u\n X1, Y1 - vadd.u\n X2, Y2 - vadd.u\n X3, Y3 - - // x ^= k - veor X0, ROUND_KEY - veor X1, ROUND_KEY - veor X2, ROUND_KEY - veor X3, ROUND_KEY - - // y = rol(y, 3) - vshl.u\n TMP0, Y0, #3 - vshl.u\n TMP1, Y1, #3 - vshl.u\n TMP2, Y2, #3 - vshl.u\n TMP3, Y3, #3 - vsri.u\n TMP0, Y0, #(\n - 3) - vsri.u\n TMP1, Y1, #(\n - 3) - vsri.u\n TMP2, Y2, #(\n - 3) - vsri.u\n TMP3, Y3, #(\n - 3) - - // y ^= x - veor Y0, TMP0, X0 - veor Y1, TMP1, X1 - veor Y2, TMP2, X2 - veor Y3, TMP3, X3 -.endm - -/* - * _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time - * - * This is the inverse of _speck_round_128bytes(). - */ -.macro _speck_unround_128bytes n - - // y ^= x - veor TMP0, Y0, X0 - veor TMP1, Y1, X1 - veor TMP2, Y2, X2 - veor TMP3, Y3, X3 - - // y = ror(y, 3) - vshr.u\n Y0, TMP0, #3 - vshr.u\n Y1, TMP1, #3 - vshr.u\n Y2, TMP2, #3 - vshr.u\n Y3, TMP3, #3 - vsli.u\n Y0, TMP0, #(\n - 3) - vsli.u\n Y1, TMP1, #(\n - 3) - vsli.u\n Y2, TMP2, #(\n - 3) - vsli.u\n Y3, TMP3, #(\n - 3) - - // x ^= k - veor X0, ROUND_KEY - veor X1, ROUND_KEY - veor X2, ROUND_KEY - veor X3, ROUND_KEY - - // x -= y - vsub.u\n X0, Y0 - vsub.u\n X1, Y1 - vsub.u\n X2, Y2 - vsub.u\n X3, Y3 - - // x = rol(x, 8); - vtbl.8 X0_L, {X0_L}, ROTATE_TABLE - vtbl.8 X0_H, {X0_H}, ROTATE_TABLE - vtbl.8 X1_L, {X1_L}, ROTATE_TABLE - vtbl.8 X1_H, {X1_H}, ROTATE_TABLE - vtbl.8 X2_L, {X2_L}, ROTATE_TABLE - vtbl.8 X2_H, {X2_H}, ROTATE_TABLE - vtbl.8 X3_L, {X3_L}, ROTATE_TABLE - vtbl.8 X3_H, {X3_H}, ROTATE_TABLE -.endm - -.macro _xts128_precrypt_one dst_reg, tweak_buf, tmp - - // Load the next source block - vld1.8 {\dst_reg}, [SRC]! - - // Save the current tweak in the tweak buffer - vst1.8 {TWEAKV}, [\tweak_buf:128]! - - // XOR the next source block with the current tweak - veor \dst_reg, TWEAKV - - /* - * Calculate the next tweak by multiplying the current one by x, - * modulo p(x) = x^128 + x^7 + x^2 + x + 1. - */ - vshr.u64 \tmp, TWEAKV, #63 - vshl.u64 TWEAKV, #1 - veor TWEAKV_H, \tmp\()_L - vtbl.8 \tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H - veor TWEAKV_L, \tmp\()_H -.endm - -.macro _xts64_precrypt_two dst_reg, tweak_buf, tmp - - // Load the next two source blocks - vld1.8 {\dst_reg}, [SRC]! - - // Save the current two tweaks in the tweak buffer - vst1.8 {TWEAKV}, [\tweak_buf:128]! - - // XOR the next two source blocks with the current two tweaks - veor \dst_reg, TWEAKV - - /* - * Calculate the next two tweaks by multiplying the current ones by x^2, - * modulo p(x) = x^64 + x^4 + x^3 + x + 1. - */ - vshr.u64 \tmp, TWEAKV, #62 - vshl.u64 TWEAKV, #2 - vtbl.8 \tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L - vtbl.8 \tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H - veor TWEAKV, \tmp -.endm - -/* - * _speck_xts_crypt() - Speck-XTS encryption/decryption - * - * Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer - * using Speck-XTS, specifically the variant with a block size of '2n' and round - * count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and - * the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a - * nonzero multiple of 128. - */ -.macro _speck_xts_crypt n, decrypting - push {r4-r7} - mov r7, sp - - /* - * The first four parameters were passed in registers r0-r3. Load the - * additional parameters, which were passed on the stack. - */ - ldr NBYTES, [sp, #16] - ldr TWEAK, [sp, #20] - - /* - * If decrypting, modify the ROUND_KEYS parameter to point to the last - * round key rather than the first, since for decryption the round keys - * are used in reverse order. - */ -.if \decrypting -.if \n == 64 - add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3 - sub ROUND_KEYS, #8 -.else - add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2 - sub ROUND_KEYS, #4 -.endif -.endif - - // Load the index vector for vtbl-based 8-bit rotates -.if \decrypting - ldr r12, =.Lrol\n\()_8_table -.else - ldr r12, =.Lror\n\()_8_table -.endif - vld1.8 {ROTATE_TABLE}, [r12:64] - - // One-time XTS preparation - - /* - * Allocate stack space to store 128 bytes worth of tweaks. For - * performance, this space is aligned to a 16-byte boundary so that we - * can use the load/store instructions that declare 16-byte alignment. - */ - sub sp, #128 - bic sp, #0xf - -.if \n == 64 - // Load first tweak - vld1.8 {TWEAKV}, [TWEAK] - - // Load GF(2^128) multiplication table - ldr r12, =.Lgf128mul_table - vld1.8 {GF128MUL_TABLE}, [r12:64] -.else - // Load first tweak - vld1.8 {TWEAKV_L}, [TWEAK] - - // Load GF(2^64) multiplication table - ldr r12, =.Lgf64mul_table - vld1.8 {GF64MUL_TABLE}, [r12:64] - - // Calculate second tweak, packing it together with the first - vshr.u64 TMP0_L, TWEAKV_L, #63 - vtbl.u8 TMP0_L, {GF64MUL_TABLE}, TMP0_L - vshl.u64 TWEAKV_H, TWEAKV_L, #1 - veor TWEAKV_H, TMP0_L -.endif - -.Lnext_128bytes_\@: - - /* - * Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak - * values, and save the tweaks on the stack for later. Then - * de-interleave the 'x' and 'y' elements of each block, i.e. make it so - * that the X[0-3] registers contain only the second halves of blocks, - * and the Y[0-3] registers contain only the first halves of blocks. - * (Speck uses the order (y, x) rather than the more intuitive (x, y).) - */ - mov r12, sp -.if \n == 64 - _xts128_precrypt_one X0, r12, TMP0 - _xts128_precrypt_one Y0, r12, TMP0 - _xts128_precrypt_one X1, r12, TMP0 - _xts128_precrypt_one Y1, r12, TMP0 - _xts128_precrypt_one X2, r12, TMP0 - _xts128_precrypt_one Y2, r12, TMP0 - _xts128_precrypt_one X3, r12, TMP0 - _xts128_precrypt_one Y3, r12, TMP0 - vswp X0_L, Y0_H - vswp X1_L, Y1_H - vswp X2_L, Y2_H - vswp X3_L, Y3_H -.else - _xts64_precrypt_two X0, r12, TMP0 - _xts64_precrypt_two Y0, r12, TMP0 - _xts64_precrypt_two X1, r12, TMP0 - _xts64_precrypt_two Y1, r12, TMP0 - _xts64_precrypt_two X2, r12, TMP0 - _xts64_precrypt_two Y2, r12, TMP0 - _xts64_precrypt_two X3, r12, TMP0 - _xts64_precrypt_two Y3, r12, TMP0 - vuzp.32 Y0, X0 - vuzp.32 Y1, X1 - vuzp.32 Y2, X2 - vuzp.32 Y3, X3 -.endif - - // Do the cipher rounds - - mov r12, ROUND_KEYS - mov r6, NROUNDS - -.Lnext_round_\@: -.if \decrypting -.if \n == 64 - vld1.64 ROUND_KEY_L, [r12] - sub r12, #8 - vmov ROUND_KEY_H, ROUND_KEY_L -.else - vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12] - sub r12, #4 -.endif - _speck_unround_128bytes \n -.else -.if \n == 64 - vld1.64 ROUND_KEY_L, [r12]! - vmov ROUND_KEY_H, ROUND_KEY_L -.else - vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]! -.endif - _speck_round_128bytes \n -.endif - subs r6, r6, #1 - bne .Lnext_round_\@ - - // Re-interleave the 'x' and 'y' elements of each block -.if \n == 64 - vswp X0_L, Y0_H - vswp X1_L, Y1_H - vswp X2_L, Y2_H - vswp X3_L, Y3_H -.else - vzip.32 Y0, X0 - vzip.32 Y1, X1 - vzip.32 Y2, X2 - vzip.32 Y3, X3 -.endif - - // XOR the encrypted/decrypted blocks with the tweaks we saved earlier - mov r12, sp - vld1.8 {TMP0, TMP1}, [r12:128]! - vld1.8 {TMP2, TMP3}, [r12:128]! - veor X0, TMP0 - veor Y0, TMP1 - veor X1, TMP2 - veor Y1, TMP3 - vld1.8 {TMP0, TMP1}, [r12:128]! - vld1.8 {TMP2, TMP3}, [r12:128]! - veor X2, TMP0 - veor Y2, TMP1 - veor X3, TMP2 - veor Y3, TMP3 - - // Store the ciphertext in the destination buffer - vst1.8 {X0, Y0}, [DST]! - vst1.8 {X1, Y1}, [DST]! - vst1.8 {X2, Y2}, [DST]! - vst1.8 {X3, Y3}, [DST]! - - // Continue if there are more 128-byte chunks remaining, else return - subs NBYTES, #128 - bne .Lnext_128bytes_\@ - - // Store the next tweak -.if \n == 64 - vst1.8 {TWEAKV}, [TWEAK] -.else - vst1.8 {TWEAKV_L}, [TWEAK] -.endif - - mov sp, r7 - pop {r4-r7} - bx lr -.endm - -ENTRY(speck128_xts_encrypt_neon) - _speck_xts_crypt n=64, decrypting=0 -ENDPROC(speck128_xts_encrypt_neon) - -ENTRY(speck128_xts_decrypt_neon) - _speck_xts_crypt n=64, decrypting=1 -ENDPROC(speck128_xts_decrypt_neon) - -ENTRY(speck64_xts_encrypt_neon) - _speck_xts_crypt n=32, decrypting=0 -ENDPROC(speck64_xts_encrypt_neon) - -ENTRY(speck64_xts_decrypt_neon) - _speck_xts_crypt n=32, decrypting=1 -ENDPROC(speck64_xts_decrypt_neon) diff --git a/arch/arm/crypto/speck-neon-glue.c b/arch/arm/crypto/speck-neon-glue.c deleted file mode 100644 index ea36c3a6f1d9..000000000000 --- a/arch/arm/crypto/speck-neon-glue.c +++ /dev/null @@ -1,314 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * NEON-accelerated implementation of Speck128-XTS and Speck64-XTS - * - * Copyright (c) 2018 Google, Inc - * - * Note: the NIST recommendation for XTS only specifies a 128-bit block size, - * but a 64-bit version (needed for Speck64) is fairly straightforward; the math - * is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial - * x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004: - * "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes - * OCB and PMAC"), represented as 0x1B. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* The assembly functions only handle multiples of 128 bytes */ -#define SPECK_NEON_CHUNK_SIZE 128 - -/* Speck128 */ - -struct speck128_xts_tfm_ctx { - struct speck128_tfm_ctx main_key; - struct speck128_tfm_ctx tweak_key; -}; - -asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *, - u8 *, const u8 *); -typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *, - const void *, unsigned int, void *); - -static __always_inline int -__speck128_xts_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - speck128_crypt_one_t crypt_one, - speck128_xts_crypt_many_t crypt_many) -{ - struct crypto_blkcipher *tfm = desc->tfm; - const struct speck128_xts_tfm_ctx *ctx = crypto_blkcipher_ctx(tfm); - struct blkcipher_walk walk; - le128 tweak; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, SPECK_NEON_CHUNK_SIZE); - - crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - const u8 *src = walk.src.virt.addr; - - if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { - unsigned int count; - - count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); - kernel_neon_begin(); - (*crypt_many)(ctx->main_key.round_keys, - ctx->main_key.nrounds, - dst, src, count, &tweak); - kernel_neon_end(); - dst += count; - src += count; - nbytes -= count; - } - - /* Handle any remainder with generic code */ - while (nbytes >= sizeof(tweak)) { - le128_xor((le128 *)dst, (const le128 *)src, &tweak); - (*crypt_one)(&ctx->main_key, dst, dst); - le128_xor((le128 *)dst, (const le128 *)dst, &tweak); - gf128mul_x_ble((be128 *)&tweak, (const be128 *)&tweak); - - dst += sizeof(tweak); - src += sizeof(tweak); - nbytes -= sizeof(tweak); - } - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static int speck128_xts_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes) -{ - return __speck128_xts_crypt(desc, dst, src, nbytes, - crypto_speck128_encrypt, - speck128_xts_encrypt_neon); -} - -static int speck128_xts_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes) -{ - return __speck128_xts_crypt(desc, dst, src, nbytes, - crypto_speck128_decrypt, - speck128_xts_decrypt_neon); -} - -static int speck128_xts_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck128_xts_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - if (keylen % 2) - return -EINVAL; - - keylen /= 2; - - err = crypto_speck128_setkey(&ctx->main_key, key, keylen); - if (err) - return err; - - return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen); -} - -/* Speck64 */ - -struct speck64_xts_tfm_ctx { - struct speck64_tfm_ctx main_key; - struct speck64_tfm_ctx tweak_key; -}; - -asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds, - void *dst, const void *src, - unsigned int nbytes, void *tweak); - -typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *, - u8 *, const u8 *); -typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *, - const void *, unsigned int, void *); - -static __always_inline int -__speck64_xts_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - speck64_crypt_one_t crypt_one, - speck64_xts_crypt_many_t crypt_many) -{ - struct crypto_blkcipher *tfm = desc->tfm; - const struct speck64_xts_tfm_ctx *ctx = crypto_blkcipher_ctx(tfm); - struct blkcipher_walk walk; - __le64 tweak; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, SPECK_NEON_CHUNK_SIZE); - - crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - u8 *dst = walk.dst.virt.addr; - const u8 *src = walk.src.virt.addr; - - if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) { - unsigned int count; - - count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE); - kernel_neon_begin(); - (*crypt_many)(ctx->main_key.round_keys, - ctx->main_key.nrounds, - dst, src, count, &tweak); - kernel_neon_end(); - dst += count; - src += count; - nbytes -= count; - } - - /* Handle any remainder with generic code */ - while (nbytes >= sizeof(tweak)) { - *(__le64 *)dst = *(__le64 *)src ^ tweak; - (*crypt_one)(&ctx->main_key, dst, dst); - *(__le64 *)dst ^= tweak; - tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^ - ((tweak & cpu_to_le64(1ULL << 63)) ? - 0x1B : 0)); - dst += sizeof(tweak); - src += sizeof(tweak); - nbytes -= sizeof(tweak); - } - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; -} - -static int speck64_xts_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - return __speck64_xts_crypt(desc, dst, src, nbytes, - crypto_speck64_encrypt, - speck64_xts_encrypt_neon); -} - -static int speck64_xts_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - return __speck64_xts_crypt(desc, dst, src, nbytes, - crypto_speck64_decrypt, - speck64_xts_decrypt_neon); -} - -static int speck64_xts_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck64_xts_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - if (keylen % 2) - return -EINVAL; - - keylen /= 2; - - err = crypto_speck64_setkey(&ctx->main_key, key, keylen); - if (err) - return err; - - return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen); -} - -static struct crypto_alg speck_algs[] = { - { - .cra_name = "xts(speck128)", - .cra_driver_name = "xts-speck128-neon", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SPECK128_BLOCK_SIZE, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = 2 * SPECK128_128_KEY_SIZE, - .max_keysize = 2 * SPECK128_256_KEY_SIZE, - .ivsize = SPECK128_BLOCK_SIZE, - .setkey = speck128_xts_setkey, - .encrypt = speck128_xts_encrypt, - .decrypt = speck128_xts_decrypt, - } - } - }, { - .cra_name = "xts(speck64)", - .cra_driver_name = "xts-speck64-neon", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = SPECK64_BLOCK_SIZE, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx), - .cra_alignmask = 7, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = 2 * SPECK64_96_KEY_SIZE, - .max_keysize = 2 * SPECK64_128_KEY_SIZE, - .ivsize = SPECK64_BLOCK_SIZE, - .setkey = speck64_xts_setkey, - .encrypt = speck64_xts_encrypt, - .decrypt = speck64_xts_decrypt, - } - } - } -}; - -static int __init speck_neon_module_init(void) -{ - if (!(elf_hwcap & HWCAP_NEON)) - return -ENODEV; - return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs)); -} - -static void __exit speck_neon_module_exit(void) -{ - crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs)); -} - -module_init(speck_neon_module_init); -module_exit(speck_neon_module_exit); - -MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers "); -MODULE_ALIAS_CRYPTO("xts(speck128)"); -MODULE_ALIAS_CRYPTO("xts-speck128-neon"); -MODULE_ALIAS_CRYPTO("xts(speck64)"); -MODULE_ALIAS_CRYPTO("xts-speck64-neon"); From 96976f956db793c2cee2e8dc4507e2001d3ab34f Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 22 Oct 2018 14:15:33 -0700 Subject: [PATCH 009/189] Revert "FROMGIT: crypto: speck - export common helpers" This reverts commit e86420efb2dfeee0152e125765134d423759bd5a. Bug: 116008047 Change-Id: I9d0a8357be1ab090a793646716771015299fb7fe Signed-off-by: Alistair Strachan --- crypto/speck.c | 90 +++++++++++++++++++----------------------- include/crypto/speck.h | 62 ----------------------------- 2 files changed, 41 insertions(+), 111 deletions(-) delete mode 100644 include/crypto/speck.h diff --git a/crypto/speck.c b/crypto/speck.c index 58aa9f7f91f7..4e80ad76bcd7 100644 --- a/crypto/speck.c +++ b/crypto/speck.c @@ -24,7 +24,6 @@ */ #include -#include #include #include #include @@ -32,6 +31,22 @@ /* Speck128 */ +#define SPECK128_BLOCK_SIZE 16 + +#define SPECK128_128_KEY_SIZE 16 +#define SPECK128_128_NROUNDS 32 + +#define SPECK128_192_KEY_SIZE 24 +#define SPECK128_192_NROUNDS 33 + +#define SPECK128_256_KEY_SIZE 32 +#define SPECK128_256_NROUNDS 34 + +struct speck128_tfm_ctx { + u64 round_keys[SPECK128_256_NROUNDS]; + int nrounds; +}; + static __always_inline void speck128_round(u64 *x, u64 *y, u64 k) { *x = ror64(*x, 8); @@ -50,9 +65,9 @@ static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k) *x = rol64(*x, 8); } -void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx, - u8 *out, const u8 *in) +static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { + const struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u64 y = get_unaligned_le64(in); u64 x = get_unaligned_le64(in + 8); int i; @@ -63,16 +78,10 @@ void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx, put_unaligned_le64(y, out); put_unaligned_le64(x, out + 8); } -EXPORT_SYMBOL_GPL(crypto_speck128_encrypt); -static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in); -} - -void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx, - u8 *out, const u8 *in) +static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { + const struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u64 y = get_unaligned_le64(in); u64 x = get_unaligned_le64(in + 8); int i; @@ -83,16 +92,11 @@ void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx, put_unaligned_le64(y, out); put_unaligned_le64(x, out + 8); } -EXPORT_SYMBOL_GPL(crypto_speck128_decrypt); -static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in); -} - -int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key, +static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { + struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u64 l[3]; u64 k; int i; @@ -134,16 +138,22 @@ int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key, return 0; } -EXPORT_SYMBOL_GPL(crypto_speck128_setkey); - -static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen); -} /* Speck64 */ +#define SPECK64_BLOCK_SIZE 8 + +#define SPECK64_96_KEY_SIZE 12 +#define SPECK64_96_NROUNDS 26 + +#define SPECK64_128_KEY_SIZE 16 +#define SPECK64_128_NROUNDS 27 + +struct speck64_tfm_ctx { + u32 round_keys[SPECK64_128_NROUNDS]; + int nrounds; +}; + static __always_inline void speck64_round(u32 *x, u32 *y, u32 k) { *x = ror32(*x, 8); @@ -162,9 +172,9 @@ static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k) *x = rol32(*x, 8); } -void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx, - u8 *out, const u8 *in) +static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { + const struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u32 y = get_unaligned_le32(in); u32 x = get_unaligned_le32(in + 4); int i; @@ -175,16 +185,10 @@ void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx, put_unaligned_le32(y, out); put_unaligned_le32(x, out + 4); } -EXPORT_SYMBOL_GPL(crypto_speck64_encrypt); -static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in); -} - -void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx, - u8 *out, const u8 *in) +static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { + const struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u32 y = get_unaligned_le32(in); u32 x = get_unaligned_le32(in + 4); int i; @@ -195,16 +199,11 @@ void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx, put_unaligned_le32(y, out); put_unaligned_le32(x, out + 4); } -EXPORT_SYMBOL_GPL(crypto_speck64_decrypt); -static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in); -} - -int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key, +static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { + struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); u32 l[3]; u32 k; int i; @@ -237,13 +236,6 @@ int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key, return 0; } -EXPORT_SYMBOL_GPL(crypto_speck64_setkey); - -static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen); -} /* Algorithm definitions */ diff --git a/include/crypto/speck.h b/include/crypto/speck.h deleted file mode 100644 index 73cfc952d405..000000000000 --- a/include/crypto/speck.h +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Common values for the Speck algorithm - */ - -#ifndef _CRYPTO_SPECK_H -#define _CRYPTO_SPECK_H - -#include - -/* Speck128 */ - -#define SPECK128_BLOCK_SIZE 16 - -#define SPECK128_128_KEY_SIZE 16 -#define SPECK128_128_NROUNDS 32 - -#define SPECK128_192_KEY_SIZE 24 -#define SPECK128_192_NROUNDS 33 - -#define SPECK128_256_KEY_SIZE 32 -#define SPECK128_256_NROUNDS 34 - -struct speck128_tfm_ctx { - u64 round_keys[SPECK128_256_NROUNDS]; - int nrounds; -}; - -void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx, - u8 *out, const u8 *in); - -void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx, - u8 *out, const u8 *in); - -int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key, - unsigned int keysize); - -/* Speck64 */ - -#define SPECK64_BLOCK_SIZE 8 - -#define SPECK64_96_KEY_SIZE 12 -#define SPECK64_96_NROUNDS 26 - -#define SPECK64_128_KEY_SIZE 16 -#define SPECK64_128_NROUNDS 27 - -struct speck64_tfm_ctx { - u32 round_keys[SPECK64_128_NROUNDS]; - int nrounds; -}; - -void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx, - u8 *out, const u8 *in); - -void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx, - u8 *out, const u8 *in); - -int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key, - unsigned int keysize); - -#endif /* _CRYPTO_SPECK_H */ From ced713fdd6a3f36f4bebd24f551203d1fd6d22e7 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 22 Oct 2018 14:15:58 -0700 Subject: [PATCH 010/189] Revert "BACKPORT, FROMGIT: crypto: speck - add support for the Speck block cipher" This reverts commit 1decfa0b7db0512fcf6e55abc821d58773510a24. Bug: 116008047 Change-Id: If9192b30cdb4212fb6c8111d70c532a109695fbd Signed-off-by: Alistair Strachan --- crypto/Kconfig | 14 --- crypto/Makefile | 1 - crypto/speck.c | 299 ----------------------------------------------- crypto/testmgr.c | 30 ----- crypto/testmgr.h | 128 -------------------- 5 files changed, 472 deletions(-) delete mode 100644 crypto/speck.c diff --git a/crypto/Kconfig b/crypto/Kconfig index ba8ca7cedb7e..7df8c42a3e01 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1390,20 +1390,6 @@ config CRYPTO_SERPENT_AVX2_X86_64 See also: -config CRYPTO_SPECK - tristate "Speck cipher algorithm" - select CRYPTO_ALGAPI - help - Speck is a lightweight block cipher that is tuned for optimal - performance in software (rather than hardware). - - Speck may not be as secure as AES, and should only be used on systems - where AES is not fast enough. - - See also: - - If unsure, say N. - config CRYPTO_TEA tristate "TEA, XTEA and XETA cipher algorithms" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index 74f36e7d163f..34a6b0bbccef 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -98,7 +98,6 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o obj-$(CONFIG_CRYPTO_SEED) += seed.o -obj-$(CONFIG_CRYPTO_SPECK) += speck.o obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o diff --git a/crypto/speck.c b/crypto/speck.c deleted file mode 100644 index 4e80ad76bcd7..000000000000 --- a/crypto/speck.c +++ /dev/null @@ -1,299 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Speck: a lightweight block cipher - * - * Copyright (c) 2018 Google, Inc - * - * Speck has 10 variants, including 5 block sizes. For now we only implement - * the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and - * Speck64/128. Speck${B}/${K} denotes the variant with a block size of B bits - * and a key size of K bits. The Speck128 variants are believed to be the most - * secure variants, and they use the same block size and key sizes as AES. The - * Speck64 variants are less secure, but on 32-bit processors are usually - * faster. The remaining variants (Speck32, Speck48, and Speck96) are even less - * secure and/or not as well suited for implementation on either 32-bit or - * 64-bit processors, so are omitted. - * - * Reference: "The Simon and Speck Families of Lightweight Block Ciphers" - * https://eprint.iacr.org/2013/404.pdf - * - * In a correspondence, the Speck designers have also clarified that the words - * should be interpreted in little-endian format, and the words should be - * ordered such that the first word of each block is 'y' rather than 'x', and - * the first key word (rather than the last) becomes the first round key. - */ - -#include -#include -#include -#include -#include - -/* Speck128 */ - -#define SPECK128_BLOCK_SIZE 16 - -#define SPECK128_128_KEY_SIZE 16 -#define SPECK128_128_NROUNDS 32 - -#define SPECK128_192_KEY_SIZE 24 -#define SPECK128_192_NROUNDS 33 - -#define SPECK128_256_KEY_SIZE 32 -#define SPECK128_256_NROUNDS 34 - -struct speck128_tfm_ctx { - u64 round_keys[SPECK128_256_NROUNDS]; - int nrounds; -}; - -static __always_inline void speck128_round(u64 *x, u64 *y, u64 k) -{ - *x = ror64(*x, 8); - *x += *y; - *x ^= k; - *y = rol64(*y, 3); - *y ^= *x; -} - -static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k) -{ - *y ^= *x; - *y = ror64(*y, 3); - *x ^= k; - *x -= *y; - *x = rol64(*x, 8); -} - -static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - const struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u64 y = get_unaligned_le64(in); - u64 x = get_unaligned_le64(in + 8); - int i; - - for (i = 0; i < ctx->nrounds; i++) - speck128_round(&x, &y, ctx->round_keys[i]); - - put_unaligned_le64(y, out); - put_unaligned_le64(x, out + 8); -} - -static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - const struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u64 y = get_unaligned_le64(in); - u64 x = get_unaligned_le64(in + 8); - int i; - - for (i = ctx->nrounds - 1; i >= 0; i--) - speck128_unround(&x, &y, ctx->round_keys[i]); - - put_unaligned_le64(y, out); - put_unaligned_le64(x, out + 8); -} - -static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck128_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u64 l[3]; - u64 k; - int i; - - switch (keylen) { - case SPECK128_128_KEY_SIZE: - k = get_unaligned_le64(key); - l[0] = get_unaligned_le64(key + 8); - ctx->nrounds = SPECK128_128_NROUNDS; - for (i = 0; i < ctx->nrounds; i++) { - ctx->round_keys[i] = k; - speck128_round(&l[0], &k, i); - } - break; - case SPECK128_192_KEY_SIZE: - k = get_unaligned_le64(key); - l[0] = get_unaligned_le64(key + 8); - l[1] = get_unaligned_le64(key + 16); - ctx->nrounds = SPECK128_192_NROUNDS; - for (i = 0; i < ctx->nrounds; i++) { - ctx->round_keys[i] = k; - speck128_round(&l[i % 2], &k, i); - } - break; - case SPECK128_256_KEY_SIZE: - k = get_unaligned_le64(key); - l[0] = get_unaligned_le64(key + 8); - l[1] = get_unaligned_le64(key + 16); - l[2] = get_unaligned_le64(key + 24); - ctx->nrounds = SPECK128_256_NROUNDS; - for (i = 0; i < ctx->nrounds; i++) { - ctx->round_keys[i] = k; - speck128_round(&l[i % 3], &k, i); - } - break; - default: - return -EINVAL; - } - - return 0; -} - -/* Speck64 */ - -#define SPECK64_BLOCK_SIZE 8 - -#define SPECK64_96_KEY_SIZE 12 -#define SPECK64_96_NROUNDS 26 - -#define SPECK64_128_KEY_SIZE 16 -#define SPECK64_128_NROUNDS 27 - -struct speck64_tfm_ctx { - u32 round_keys[SPECK64_128_NROUNDS]; - int nrounds; -}; - -static __always_inline void speck64_round(u32 *x, u32 *y, u32 k) -{ - *x = ror32(*x, 8); - *x += *y; - *x ^= k; - *y = rol32(*y, 3); - *y ^= *x; -} - -static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k) -{ - *y ^= *x; - *y = ror32(*y, 3); - *x ^= k; - *x -= *y; - *x = rol32(*x, 8); -} - -static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - const struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u32 y = get_unaligned_le32(in); - u32 x = get_unaligned_le32(in + 4); - int i; - - for (i = 0; i < ctx->nrounds; i++) - speck64_round(&x, &y, ctx->round_keys[i]); - - put_unaligned_le32(y, out); - put_unaligned_le32(x, out + 4); -} - -static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) -{ - const struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u32 y = get_unaligned_le32(in); - u32 x = get_unaligned_le32(in + 4); - int i; - - for (i = ctx->nrounds - 1; i >= 0; i--) - speck64_unround(&x, &y, ctx->round_keys[i]); - - put_unaligned_le32(y, out); - put_unaligned_le32(x, out + 4); -} - -static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct speck64_tfm_ctx *ctx = crypto_tfm_ctx(tfm); - u32 l[3]; - u32 k; - int i; - - switch (keylen) { - case SPECK64_96_KEY_SIZE: - k = get_unaligned_le32(key); - l[0] = get_unaligned_le32(key + 4); - l[1] = get_unaligned_le32(key + 8); - ctx->nrounds = SPECK64_96_NROUNDS; - for (i = 0; i < ctx->nrounds; i++) { - ctx->round_keys[i] = k; - speck64_round(&l[i % 2], &k, i); - } - break; - case SPECK64_128_KEY_SIZE: - k = get_unaligned_le32(key); - l[0] = get_unaligned_le32(key + 4); - l[1] = get_unaligned_le32(key + 8); - l[2] = get_unaligned_le32(key + 12); - ctx->nrounds = SPECK64_128_NROUNDS; - for (i = 0; i < ctx->nrounds; i++) { - ctx->round_keys[i] = k; - speck64_round(&l[i % 3], &k, i); - } - break; - default: - return -EINVAL; - } - - return 0; -} - -/* Algorithm definitions */ - -static struct crypto_alg speck_algs[] = { - { - .cra_name = "speck128", - .cra_driver_name = "speck128-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = SPECK128_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct speck128_tfm_ctx), - .cra_module = THIS_MODULE, - .cra_u = { - .cipher = { - .cia_min_keysize = SPECK128_128_KEY_SIZE, - .cia_max_keysize = SPECK128_256_KEY_SIZE, - .cia_setkey = speck128_setkey, - .cia_encrypt = speck128_encrypt, - .cia_decrypt = speck128_decrypt - } - } - }, { - .cra_name = "speck64", - .cra_driver_name = "speck64-generic", - .cra_priority = 100, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = SPECK64_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct speck64_tfm_ctx), - .cra_module = THIS_MODULE, - .cra_u = { - .cipher = { - .cia_min_keysize = SPECK64_96_KEY_SIZE, - .cia_max_keysize = SPECK64_128_KEY_SIZE, - .cia_setkey = speck64_setkey, - .cia_encrypt = speck64_encrypt, - .cia_decrypt = speck64_decrypt - } - } - } -}; - -static int __init speck_module_init(void) -{ - return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs)); -} - -static void __exit speck_module_exit(void) -{ - crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs)); -} - -module_init(speck_module_init); -module_exit(speck_module_exit); - -MODULE_DESCRIPTION("Speck block cipher (generic)"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers "); -MODULE_ALIAS_CRYPTO("speck128"); -MODULE_ALIAS_CRYPTO("speck128-generic"); -MODULE_ALIAS_CRYPTO("speck64"); -MODULE_ALIAS_CRYPTO("speck64-generic"); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 4760b564dc17..88f1b6cb4dec 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3113,36 +3113,6 @@ static const struct alg_test_desc alg_test_descs[] = { } } } - }, { - .alg = "ecb(speck128)", - .test = alg_test_skcipher, - .suite = { - .cipher = { - .enc = { - .vecs = speck128_enc_tv_template, - .count = ARRAY_SIZE(speck128_enc_tv_template) - }, - .dec = { - .vecs = speck128_dec_tv_template, - .count = ARRAY_SIZE(speck128_dec_tv_template) - } - } - } - }, { - .alg = "ecb(speck64)", - .test = alg_test_skcipher, - .suite = { - .cipher = { - .enc = { - .vecs = speck64_enc_tv_template, - .count = ARRAY_SIZE(speck64_enc_tv_template) - }, - .dec = { - .vecs = speck64_dec_tv_template, - .count = ARRAY_SIZE(speck64_dec_tv_template) - } - } - } }, { .alg = "ecb(tea)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 9b59a21a89e8..9aad61534246 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -12589,134 +12589,6 @@ static struct cipher_testvec serpent_xts_dec_tv_template[] = { }, }; -/* - * Speck test vectors taken from the original paper: - * "The Simon and Speck Families of Lightweight Block Ciphers" - * https://eprint.iacr.org/2013/404.pdf - * - * Note that the paper does not make byte and word order clear. But it was - * confirmed with the authors that the intended orders are little endian byte - * order and (y, x) word order. Equivalently, the printed test vectors, when - * looking at only the bytes (ignoring the whitespace that divides them into - * words), are backwards: the left-most byte is actually the one with the - * highest memory address, while the right-most byte is actually the one with - * the lowest memory address. - */ - -static struct cipher_testvec speck128_enc_tv_template[] = { - { /* Speck128/128 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .klen = 16, - .input = "\x20\x6d\x61\x64\x65\x20\x69\x74" - "\x20\x65\x71\x75\x69\x76\x61\x6c", - .ilen = 16, - .result = "\x18\x0d\x57\x5c\xdf\xfe\x60\x78" - "\x65\x32\x78\x79\x51\x98\x5d\xa6", - .rlen = 16, - }, { /* Speck128/192 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17", - .klen = 24, - .input = "\x65\x6e\x74\x20\x74\x6f\x20\x43" - "\x68\x69\x65\x66\x20\x48\x61\x72", - .ilen = 16, - .result = "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9" - "\x66\x55\x13\x13\x3a\xcf\xe4\x1b", - .rlen = 16, - }, { /* Speck128/256 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", - .klen = 32, - .input = "\x70\x6f\x6f\x6e\x65\x72\x2e\x20" - "\x49\x6e\x20\x74\x68\x6f\x73\x65", - .ilen = 16, - .result = "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e" - "\x3e\xf5\xc0\x05\x04\x01\x09\x41", - .rlen = 16, - }, -}; - -static struct cipher_testvec speck128_dec_tv_template[] = { - { /* Speck128/128 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - .klen = 16, - .input = "\x18\x0d\x57\x5c\xdf\xfe\x60\x78" - "\x65\x32\x78\x79\x51\x98\x5d\xa6", - .ilen = 16, - .result = "\x20\x6d\x61\x64\x65\x20\x69\x74" - "\x20\x65\x71\x75\x69\x76\x61\x6c", - .rlen = 16, - }, { /* Speck128/192 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17", - .klen = 24, - .input = "\x86\x18\x3c\xe0\x5d\x18\xbc\xf9" - "\x66\x55\x13\x13\x3a\xcf\xe4\x1b", - .ilen = 16, - .result = "\x65\x6e\x74\x20\x74\x6f\x20\x43" - "\x68\x69\x65\x66\x20\x48\x61\x72", - .rlen = 16, - }, { /* Speck128/256 */ - .key = "\x00\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", - .klen = 32, - .input = "\x43\x8f\x18\x9c\x8d\xb4\xee\x4e" - "\x3e\xf5\xc0\x05\x04\x01\x09\x41", - .ilen = 16, - .result = "\x70\x6f\x6f\x6e\x65\x72\x2e\x20" - "\x49\x6e\x20\x74\x68\x6f\x73\x65", - .rlen = 16, - }, -}; - -static struct cipher_testvec speck64_enc_tv_template[] = { - { /* Speck64/96 */ - .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b" - "\x10\x11\x12\x13", - .klen = 12, - .input = "\x65\x61\x6e\x73\x20\x46\x61\x74", - .ilen = 8, - .result = "\x6c\x94\x75\x41\xec\x52\x79\x9f", - .rlen = 8, - }, { /* Speck64/128 */ - .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b" - "\x10\x11\x12\x13\x18\x19\x1a\x1b", - .klen = 16, - .input = "\x2d\x43\x75\x74\x74\x65\x72\x3b", - .ilen = 8, - .result = "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c", - .rlen = 8, - }, -}; - -static struct cipher_testvec speck64_dec_tv_template[] = { - { /* Speck64/96 */ - .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b" - "\x10\x11\x12\x13", - .klen = 12, - .input = "\x6c\x94\x75\x41\xec\x52\x79\x9f", - .ilen = 8, - .result = "\x65\x61\x6e\x73\x20\x46\x61\x74", - .rlen = 8, - }, { /* Speck64/128 */ - .key = "\x00\x01\x02\x03\x08\x09\x0a\x0b" - "\x10\x11\x12\x13\x18\x19\x1a\x1b", - .klen = 16, - .input = "\x8b\x02\x4e\x45\x48\xa5\x6f\x8c", - .ilen = 8, - .result = "\x2d\x43\x75\x74\x74\x65\x72\x3b", - .rlen = 8, - }, -}; - /* Cast6 test vectors from RFC 2612 */ #define CAST6_ENC_TEST_VECTORS 4 #define CAST6_DEC_TEST_VECTORS 4 From fd73fe985329b36bbb1e0070d67da639c12b187c Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Mon, 22 Oct 2018 19:15:22 -0700 Subject: [PATCH 011/189] Build fix for 076c36fce1ea0. Bug: 116008047 Change-Id: I053866f49e1ec090fde9a6fbf19a34c5e4e6e8e3 Signed-off-by: Alistair Strachan --- fs/crypto/keyinfo.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index b13e968273a4..3cbe0eff8767 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -153,7 +153,6 @@ static struct fscrypt_mode { int keysize; bool logged_impl_name; } available_modes[] = { -<<<<<<< HEAD [FS_ENCRYPTION_MODE_AES_256_XTS] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", From c4b00eb7049611bc16a42ca0e21ea80f614ed3a4 Mon Sep 17 00:00:00 2001 From: Alistair Strachan Date: Tue, 23 Oct 2018 12:37:23 -0700 Subject: [PATCH 012/189] Revert "BACKPORT, FROMLIST: fscrypt: add Speck128/256 support" This reverts commit 4b08356a76b859b8f4599bf8821a6702ac50a029. Resolves conflicts with a later CL, e7724207f71e "fscrypt: log the crypto algorithm implementations". [astrachan: This is an update to 076c36fce1ea which reverted only the fscrypt changes, not the ext4 changes, due to a historical bad merge.] Bug: 116008047 Change-Id: I772d78d6e4bd126dcd2b25049c719f8522a1c157 Signed-off-by: Alistair Strachan --- fs/ext4/crypto.c | 12 ++---------- fs/ext4/crypto_fname.c | 6 ++++++ fs/ext4/crypto_key.c | 6 ------ fs/ext4/crypto_policy.c | 14 +++++++++----- fs/ext4/ext4.h | 5 ++--- fs/ext4/ext4_crypto.h | 4 ---- 6 files changed, 19 insertions(+), 28 deletions(-) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index f6096ee77662..f240cef8b326 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -457,17 +457,9 @@ errout: return err; } -bool ext4_valid_enc_modes(uint32_t contents_mode, uint32_t filenames_mode) +bool ext4_valid_contents_enc_mode(uint32_t mode) { - if (contents_mode == EXT4_ENCRYPTION_MODE_AES_256_XTS) { - return (filenames_mode == EXT4_ENCRYPTION_MODE_AES_256_CTS || - filenames_mode == EXT4_ENCRYPTION_MODE_AES_256_HEH); - } - - if (contents_mode == EXT4_ENCRYPTION_MODE_SPECK128_256_XTS) - return filenames_mode == EXT4_ENCRYPTION_MODE_SPECK128_256_CTS; - - return false; + return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS); } /** diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c index 5e5afb6ef71a..026716bdbbfc 100644 --- a/fs/ext4/crypto_fname.c +++ b/fs/ext4/crypto_fname.c @@ -42,6 +42,12 @@ static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res) complete(&ecr->completion); } +bool ext4_valid_filenames_enc_mode(uint32_t mode) +{ + return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS || + mode == EXT4_ENCRYPTION_MODE_AES_256_HEH); +} + static unsigned max_name_len(struct inode *inode) { return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize : diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index 68225223ffd8..14ae7781f2a8 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c @@ -258,12 +258,6 @@ int ext4_get_encryption_info(struct inode *inode) case EXT4_ENCRYPTION_MODE_AES_256_HEH: cipher_str = "heh(aes)"; break; - case EXT4_ENCRYPTION_MODE_SPECK128_256_XTS: - cipher_str = "xts(speck128)"; - break; - case EXT4_ENCRYPTION_MODE_SPECK128_256_CTS: - cipher_str = "cts(cbc(speck128))"; - break; default: printk_once(KERN_WARNING "ext4: unsupported key mode %d (ino %u)\n", diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c index 818fa45ecf08..e4f4fc4e56ab 100644 --- a/fs/ext4/crypto_policy.c +++ b/fs/ext4/crypto_policy.c @@ -60,12 +60,16 @@ static int ext4_create_encryption_context_from_policy( ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1; memcpy(ctx.master_key_descriptor, policy->master_key_descriptor, EXT4_KEY_DESCRIPTOR_SIZE); - if (!ext4_valid_enc_modes(policy->contents_encryption_mode, - policy->filenames_encryption_mode)) { + if (!ext4_valid_contents_enc_mode(policy->contents_encryption_mode)) { printk(KERN_WARNING - "%s: Invalid encryption modes (contents %d, filenames %d)\n", - __func__, policy->contents_encryption_mode, - policy->filenames_encryption_mode); + "%s: Invalid contents encryption mode %d\n", __func__, + policy->contents_encryption_mode); + return -EINVAL; + } + if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) { + printk(KERN_WARNING + "%s: Invalid filenames encryption mode %d\n", __func__, + policy->filenames_encryption_mode); return -EINVAL; } if (policy->flags & ~EXT4_POLICY_FLAGS_VALID) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8452a5a405f4..1725a1538e87 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -589,8 +589,6 @@ enum { #define EXT4_ENCRYPTION_MODE_AES_256_GCM 2 #define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 #define EXT4_ENCRYPTION_MODE_AES_256_CTS 4 -#define EXT4_ENCRYPTION_MODE_SPECK128_256_XTS 7 -#define EXT4_ENCRYPTION_MODE_SPECK128_256_CTS 8 #define EXT4_ENCRYPTION_MODE_AES_256_HEH 126 #include "ext4_crypto.h" @@ -2256,7 +2254,7 @@ int ext4_get_policy(struct inode *inode, /* crypto.c */ extern struct kmem_cache *ext4_crypt_info_cachep; -bool ext4_valid_enc_modes(uint32_t contents_mode, uint32_t filenames_mode); +bool ext4_valid_contents_enc_mode(uint32_t mode); uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size); extern struct workqueue_struct *ext4_read_workqueue; struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode, @@ -2287,6 +2285,7 @@ static inline int ext4_sb_has_crypto(struct super_block *sb) #endif /* crypto_fname.c */ +bool ext4_valid_filenames_enc_mode(uint32_t mode); u32 ext4_fname_crypto_round_up(u32 size, u32 blksize); unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen); int ext4_fname_crypto_alloc_buffer(struct inode *inode, diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h index f7ba3be9d7ac..e52637d969db 100644 --- a/fs/ext4/ext4_crypto.h +++ b/fs/ext4/ext4_crypto.h @@ -124,10 +124,6 @@ static inline int ext4_encryption_key_size(int mode) return EXT4_AES_256_CTS_KEY_SIZE; case EXT4_ENCRYPTION_MODE_AES_256_HEH: return EXT4_AES_256_HEH_KEY_SIZE; - case EXT4_ENCRYPTION_MODE_SPECK128_256_XTS: - return 64; - case EXT4_ENCRYPTION_MODE_SPECK128_256_CTS: - return 32; default: BUG(); } From 4518c70d5a4216fd68b4fde56f4f692d1ba91c0f Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Thu, 30 Aug 2018 21:33:31 +0800 Subject: [PATCH 013/189] f2fs: add additional sanity check in f2fs_acl_from_disk() Add additinal sanity check for irregular case(e.g. corruption). If size of extended attribution is smaller than size of acl header, then return -EINVAL. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 3f52efa0f94f..c65158a8bb06 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -53,6 +53,9 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) struct f2fs_acl_entry *entry = (struct f2fs_acl_entry *)(hdr + 1); const char *end = value + size; + if (size < sizeof(struct f2fs_acl_header)) + return ERR_PTR(-EINVAL); + if (hdr->a_version != cpu_to_le32(F2FS_ACL_VERSION)) return ERR_PTR(-EINVAL); From ea89149780009de133f60752939a5bacfb69f61b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 4 Sep 2018 03:52:17 +0800 Subject: [PATCH 014/189] f2fs: fix to avoid NULL pointer dereference on se->discard_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://bugzilla.kernel.org/show_bug.cgi?id=200951 These is a NULL pointer dereference issue reported in bugzilla: Hi, in the setup there is a SATA SSD connected to a SATA-to-USB bridge. The disc is "Samsung SSD 850 PRO 256G" which supports TRIM. There are four partitions: sda1: FAT /boot sda2: F2FS / sda3: F2FS /home sda4: F2FS The bridge is ASMT1153e which uses the "uas" driver. There is no TRIM pass-through, so, when mounting it reports: mounting with "discard" option, but the device does not support discard The USB host is USB3.0 and UASP capable. It is the one on RK3399. Given this everything works fine, except there is no TRIM support. In order to enable TRIM a new UDEV rule is added [1]: /etc/udev/rules.d/10-sata-bridge-trim.rules: ACTION=="add|change", ATTRS{idVendor}=="174c", ATTRS{idProduct}=="55aa", SUBSYSTEM=="scsi_disk", ATTR{provisioning_mode}="unmap" After reboot any F2FS write hangs forever and dmesg reports: Unable to handle kernel NULL pointer dereference Also tested on a x86_64 system: works fine even with TRIM enabled. same disc same bridge different usb host controller different cpu architecture not root filesystem Regards, Vicenç. [1] Post #5 in https://bbs.archlinux.org/viewtopic.php?id=236280 Unable to handle kernel NULL pointer dereference at virtual address 000000000000003e Mem abort info: ESR = 0x96000004 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000626e3122 [000000000000003e] pgd=0000000000000000 Internal error: Oops: 96000004 [#1] SMP Modules linked in: overlay snd_soc_hdmi_codec rc_cec dw_hdmi_i2s_audio dw_hdmi_cec snd_soc_simple_card snd_soc_simple_card_utils snd_soc_rockchip_i2s rockchip_rga snd_soc_rockchip_pcm rockchipdrm videobuf2_dma_sg v4l2_mem2mem rtc_rk808 videobuf2_memops analogix_dp videobuf2_v4l2 videobuf2_common dw_hdmi dw_wdt cec rc_core videodev drm_kms_helper media drm rockchip_thermal rockchip_saradc realtek drm_panel_orientation_quirks syscopyarea sysfillrect sysimgblt fb_sys_fops dwmac_rk stmmac_platform stmmac pwm_bl squashfs loop crypto_user gpio_keys hid_kensington CPU: 5 PID: 957 Comm: nvim Not tainted 4.19.0-rc1-1-ARCH #1 Hardware name: Sapphire-RK3399 Board (DT) pstate: 00000005 (nzcv daif -PAN -UAO) pc : update_sit_entry+0x304/0x4b0 lr : update_sit_entry+0x108/0x4b0 sp : ffff00000ca13bd0 x29: ffff00000ca13bd0 x28: 000000000000003e x27: 0000000000000020 x26: 0000000000080000 x25: 0000000000000048 x24: ffff8000ebb85cf8 x23: 0000000000000253 x22: 00000000ffffffff x21: 00000000000535f2 x20: 00000000ffffffdf x19: ffff8000eb9e6800 x18: ffff8000eb9e6be8 x17: 0000000007ce6926 x16: 000000001c83ffa8 x15: 0000000000000000 x14: ffff8000f602df90 x13: 0000000000000006 x12: 0000000000000040 x11: 0000000000000228 x10: 0000000000000000 x9 : 0000000000000000 x8 : 0000000000000000 x7 : 00000000000535f2 x6 : ffff8000ebff3440 x5 : ffff8000ebff3440 x4 : ffff8000ebe3a6c8 x3 : 00000000ffffffff x2 : 0000000000000020 x1 : 0000000000000000 x0 : ffff8000eb9e5800 Process nvim (pid: 957, stack limit = 0x0000000063a78320) Call trace: update_sit_entry+0x304/0x4b0 f2fs_invalidate_blocks+0x98/0x140 truncate_node+0x90/0x400 f2fs_remove_inode_page+0xe8/0x340 f2fs_evict_inode+0x2b0/0x408 evict+0xe0/0x1e0 iput+0x160/0x260 do_unlinkat+0x214/0x298 __arm64_sys_unlinkat+0x3c/0x68 el0_svc_handler+0x94/0x118 el0_svc+0x8/0xc Code: f9400800 b9488400 36080140 f9400f01 (387c4820) ---[ end trace a0f21a307118c477 ]--- The reason is it is possible to enable discard flag on block queue via UDEV, but during mount, f2fs will initialize se->discard_map only if this flag is set, once the flag is set after mount, f2fs may dereference NULL pointer on se->discard_map. So this patch does below changes to fix this issue: - initialize and update se->discard_map all the time. - don't clear DISCARD option if device has no QUEUE_FLAG_DISCARD flag during mount. - don't issue small discard on zoned block device. - introduce some functions to enhance the readability. Signed-off-by: Chao Yu Tested-by: Vicente Bergas Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 3 +-- fs/f2fs/f2fs.h | 15 ++++++++--- fs/f2fs/file.c | 2 +- fs/f2fs/segment.c | 65 ++++++++++++++++++++--------------------------- fs/f2fs/super.c | 16 +++--------- 5 files changed, 46 insertions(+), 55 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 214a968962a1..ebe649d9793c 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -190,8 +190,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); - if (f2fs_discard_en(sbi)) - si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; if (sbi->segs_per_sec > 1) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 787df98db916..bff413119ccb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3484,11 +3484,20 @@ static inline int get_blkz_type(struct f2fs_sb_info *sbi, } #endif -static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) +static inline bool f2fs_hw_should_discard(struct f2fs_sb_info *sbi) { - struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); + return f2fs_sb_has_blkzoned(sbi->sb); +} - return blk_queue_discard(q) || f2fs_sb_has_blkzoned(sbi->sb); +static inline bool f2fs_hw_support_discard(struct f2fs_sb_info *sbi) +{ + return blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)); +} + +static inline bool f2fs_realtime_discard_enable(struct f2fs_sb_info *sbi) +{ + return (test_opt(sbi, DISCARD) && f2fs_hw_support_discard(sbi)) || + f2fs_hw_should_discard(sbi); } static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8e381b6385e3..09893856ca0d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1978,7 +1978,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!blk_queue_discard(q)) + if (!f2fs_hw_support_discard(F2FS_SB(sb))) return -EOPNOTSUPP; if (copy_from_user(&range, (struct fstrim_range __user *)arg, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fdc17721e41e..17ae3567ef6a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1805,11 +1805,11 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; - if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) + if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi)) return false; if (!force) { - if (!test_opt(sbi, DISCARD) || !se->valid_blocks || + if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; @@ -1915,7 +1915,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, dirty_i->nr_dirty[PRE]--; } - if (!test_opt(sbi, DISCARD)) + if (!f2fs_realtime_discard_enable(sbi)) continue; if (force && start >= cpc->trim_start && @@ -2105,8 +2105,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) del = 0; } - if (f2fs_discard_en(sbi) && - !f2fs_test_and_set_bit(offset, se->discard_map)) + if (!f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ @@ -2134,8 +2133,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) del = 0; } - if (f2fs_discard_en(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) + if (f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -2751,7 +2749,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) * discard option. User configuration looks like using runtime discard * or periodic fstrim instead of it. */ - if (test_opt(sbi, DISCARD)) + if (f2fs_realtime_discard_enable(sbi)) goto out; start_block = START_BLOCK(sbi, start_segno); @@ -3843,13 +3841,11 @@ static int build_sit_info(struct f2fs_sb_info *sbi) return -ENOMEM; #endif - if (f2fs_discard_en(sbi)) { - sit_i->sentries[start].discard_map - = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, - GFP_KERNEL); - if (!sit_i->sentries[start].discard_map) - return -ENOMEM; - } + sit_i->sentries[start].discard_map + = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, + GFP_KERNEL); + if (!sit_i->sentries[start].discard_map) + return -ENOMEM; } sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); @@ -3997,18 +3993,16 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) total_node_blocks += se->valid_blocks; /* build discard map only one time */ - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, - se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += - sbi->blocks_per_seg - - se->valid_blocks; - } + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, + se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += + sbi->blocks_per_seg - + se->valid_blocks; } if (sbi->segs_per_sec > 1) @@ -4046,16 +4040,13 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) if (IS_NODESEG(se->type)) total_node_blocks += se->valid_blocks; - if (f2fs_discard_en(sbi)) { - if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { - memset(se->discard_map, 0xff, - SIT_VBLOCK_MAP_SIZE); - } else { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks; - sbi->discard_blks -= se->valid_blocks; - } + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks; + sbi->discard_blks -= se->valid_blocks; } if (sbi->segs_per_sec > 1) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5e329c022e74..c8034729a7d0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -360,7 +360,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - struct request_queue *q; substring_t args[MAX_OPT_ARGS]; char *p, *name; int arg = 0; @@ -415,14 +414,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; break; case Opt_discard: - q = bdev_get_queue(sb->s_bdev); - if (blk_queue_discard(q)) { - set_opt(sbi, DISCARD); - } else if (!f2fs_sb_has_blkzoned(sb)) { - f2fs_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } + set_opt(sbi, DISCARD); break; case Opt_nodiscard: if (f2fs_sb_has_blkzoned(sb)) { @@ -1032,7 +1024,8 @@ static void f2fs_put_super(struct super_block *sb) /* be sure to wait for any on-going discard commands */ dropped = f2fs_wait_discard_bios(sbi); - if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) { + if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) && + !sbi->discard_blks && !dropped) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; @@ -1400,8 +1393,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); - if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev))) - set_opt(sbi, DISCARD); + set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi->sb)) set_opt_mode(sbi, F2FS_MOUNT_LFS); else From 810579fccebf14472c7813381eecaad398231c82 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 31 Aug 2018 15:09:26 +0530 Subject: [PATCH 015/189] f2fs: fix unnecessary periodic wakeup of discard thread when dev is busy When dev is busy, discard thread wake up timeout can be aligned with the exact time that it needs to wait for dev to come out of busy. This helps to avoid unnecessary periodic wakeups and thus save some power. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 17ae3567ef6a..cba6697a9ee2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1644,6 +1644,8 @@ static int issue_discard_thread(void *data) struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; + unsigned long interval = sbi->interval_time[REQ_TIME] * HZ; + long delta; set_freezable(); @@ -1680,7 +1682,11 @@ static int issue_discard_thread(void *data) __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; } else if (issued == -1){ - wait_ms = dpolicy.mid_interval; + delta = (sbi->last_time[REQ_TIME] + interval) - jiffies; + if (delta > 0) + wait_ms = jiffies_to_msecs(delta); + else + wait_ms = dpolicy.mid_interval; } else { wait_ms = dpolicy.max_interval; } From 09206dd9ac2ab85a7f3e628de14b410137f68618 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 22 Aug 2018 17:17:47 +0800 Subject: [PATCH 016/189] Revert "f2fs: use printk_ratelimited for f2fs_msg" Don't limit printing log, so that we will not miss any key messages. This reverts commit a36c106dffb616250117efb1cab271c19a8f94ff. In addition, we use printk_ratelimited to avoid too many log prints. - error injection - discard submission failure Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/segment.c | 6 +++--- fs/f2fs/super.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bff413119ccb..631f7a1348ab 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1371,9 +1371,9 @@ struct f2fs_sb_info { }; #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk("%sF2FS-fs : inject %s in %s of %pF\n", \ - KERN_INFO, f2fs_fault_name[type], \ +#define f2fs_show_injection_info(type) \ + printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ + KERN_INFO, f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cba6697a9ee2..8eb01d4c0ce3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -905,9 +905,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, dc->error = 0; if (dc->error) - f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard(%u, %u, %u) failed, ret: %d", - dc->lstart, dc->start, dc->len, dc->error); + printk_ratelimited( + "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d", + KERN_INFO, dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c8034729a7d0..acfe792dd523 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -207,7 +207,7 @@ void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; - printk_ratelimited("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); va_end(args); } From 9175c6044f0175531f87046edad266e11eed088e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 Aug 2018 21:18:00 -0700 Subject: [PATCH 017/189] f2fs: avoid wrong decrypted data from disk 1. Create a file in an encrypted directory 2. Do GC & drop caches 3. Read stale data before its bio for metapage was not issued yet Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 18 ++++++++++-------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 3 +-- fs/f2fs/segment.c | 6 +++++- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 267d907104a1..fd85f9ede1a8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -563,9 +563,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, ctx->bio = bio; ctx->enabled_steps = post_read_steps; bio->bi_private = ctx; - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_block_writeback(sbi, blkaddr); } return bio; @@ -580,6 +577,9 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, if (IS_ERR(bio)) return PTR_ERR(bio); + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, blkaddr); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; @@ -1555,6 +1555,12 @@ submit_and_realloc: } } + /* + * If the page is under writeback, we need to wait for + * its completion to see the correct decrypted data. + */ + f2fs_wait_on_block_writeback(inode, block_nr); + if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; @@ -1622,7 +1628,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio) return 0; /* wait for GCed page writeback via META_MAPPING */ - f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr); + f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); retry_encrypt: fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, @@ -2379,10 +2385,6 @@ repeat: f2fs_wait_on_page_writeback(page, DATA, false); - /* wait for GCed page writeback via META_MAPPING */ - if (f2fs_post_read_required(inode)) - f2fs_wait_on_block_writeback(sbi, blkaddr); - if (len == PAGE_SIZE || PageUptodate(page)) return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 631f7a1348ab..ead9916582df 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3027,7 +3027,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered); -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 09893856ca0d..c774e32d5ebb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -115,8 +115,7 @@ mapped: f2fs_wait_on_page_writeback(page, DATA, false); /* wait for GCed page writeback via META_MAPPING */ - if (f2fs_post_read_required(inode)) - f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8eb01d4c0ce3..20a68557e4f5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3276,10 +3276,14 @@ void f2fs_wait_on_page_writeback(struct page *page, } } -void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) +void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct page *cpage; + if (!f2fs_post_read_required(inode)) + return; + if (!is_valid_data_blkaddr(sbi, blkaddr)) return; From 0da4a7250d0dacb7b7ea022194da6d9c6d2eb4dc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Sep 2018 11:40:12 -0700 Subject: [PATCH 018/189] f2fs: submit bio after shutdown Sometimes, some merged IOs could get a chance to be submitted, resulting in system hang in shutdown test. This issues IOs all the time after shutdown. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index fd85f9ede1a8..ae22394a969c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -531,6 +531,8 @@ skip: if (fio->in_list) goto next; out: + if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) + __submit_merged_bio(io); up_write(&io->io_rwsem); } From 5ad8ed96fd822f9f301628ab2908a7873753ea64 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 26 Jun 2018 13:12:43 +0800 Subject: [PATCH 019/189] f2fs: report error if quota off error during umount Now, we depend on fsck to ensure quota file data is ok, so we scan whole partition if checkpoint without umount flag. It's same for quota off error case, which may make quota file data inconsistent. generic/019 reports below error: __quota_error: 1160 callbacks suppressed Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota Quota error (device zram1): write_blk: dquota write failed Quota error (device zram1): qtree_write_dquot: Error -28 occurred while creating quota VFS: Busy inodes after unmount of zram1. Self-destruct in 5 seconds. Have a nice day... If we failed in below path due to fail to write dquot block, we will miss to release quota inode, fix it. - f2fs_put_super - f2fs_quota_off_umount - f2fs_quota_off - f2fs_quota_sync <-- failed - dquot_quota_off <-- missed to call Signed-off-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index acfe792dd523..68290ea51660 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1845,7 +1845,9 @@ static int f2fs_quota_off(struct super_block *sb, int type) if (!inode || !igrab(inode)) return dquot_quota_off(sb, type); - f2fs_quota_sync(sb, type); + err = f2fs_quota_sync(sb, type); + if (err) + goto out_put; err = dquot_quota_off(sb, type); if (err || f2fs_sb_has_quota_ino(sb)) @@ -1864,9 +1866,20 @@ out_put: void f2fs_quota_off_umount(struct super_block *sb) { int type; + int err; - for (type = 0; type < MAXQUOTAS; type++) - f2fs_quota_off(sb, type); + for (type = 0; type < MAXQUOTAS; type++) { + err = f2fs_quota_off(sb, type); + if (err) { + int ret = dquot_quota_off(sb, type); + + f2fs_msg(sb, KERN_ERR, + "Fail to turn off disk quota " + "(type: %d, err: %d, ret:%d), Please " + "run fsck to fix it.", type, err, ret); + set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK); + } + } } #if 0 From 4f36be46ade61d0d32aefd777212adb6770cee10 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 22 Aug 2018 17:11:05 +0800 Subject: [PATCH 020/189] f2fs: fix to flush all dirty inodes recovered in readonly fs generic/417 reported as blow: ------------[ cut here ]------------ kernel BUG at /home/yuchao/git/devf2fs/inode.c:695! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 1 PID: 21697 Comm: umount Tainted: G W O 4.18.0-rc2+ #39 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 EIP: f2fs_evict_inode+0x556/0x580 [f2fs] Call Trace: ? _raw_spin_unlock+0x2c/0x50 evict+0xa8/0x170 dispose_list+0x34/0x40 evict_inodes+0x118/0x120 generic_shutdown_super+0x41/0x100 ? rcu_read_lock_sched_held+0x97/0xa0 kill_block_super+0x22/0x50 kill_f2fs_super+0x6f/0x80 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x81/0xa0 exit_to_usermode_loop+0x59/0xa7 do_fast_syscall_32+0x1f5/0x22c entry_SYSENTER_32+0x53/0x86 EIP: f2fs_evict_inode+0x556/0x580 [f2fs] It can simply reproduced with scripts: Enable quota feature during mkfs. Testcase1: 1. mkfs.f2fs /dev/zram0 2. mount -t f2fs /dev/zram0 /mnt/f2fs 3. xfs_io -f /mnt/f2fs/file -c "pwrite 0 4k" -c "fsync" 4. godown /mnt/f2fs 5. umount /mnt/f2fs 6. mount -t f2fs -o ro /dev/zram0 /mnt/f2fs 7. umount /mnt/f2fs Testcase2: 1. mkfs.f2fs /dev/zram0 2. mount -t f2fs /dev/zram0 /mnt/f2fs 3. touch /mnt/f2fs/file 4. create process[pid = x] do: a) open /mnt/f2fs/file; b) unlink /mnt/f2fs/file 5. godown -f /mnt/f2fs 6. kill process[pid = x] 7. umount /mnt/f2fs 8. mount -t f2fs -o ro /dev/zram0 /mnt/f2fs 9. umount /mnt/f2fs The reason is: during recovery, i_{c,m}time of inode will be updated, then the inode can be set dirty w/o being tracked in sbi->inode_list[DIRTY_META] global list, so later write_checkpoint will not flush such dirty inode into node page. Once umount is called, sync_filesystem() in generic_shutdown_super() will skip syncng dirty inodes due to sb_rdonly check, leaving dirty inodes there. To solve this issue, during umount, add remove SB_RDONLY flag in sb->s_flags, to make sure sync_filesystem() will not be skipped. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/f2fs.h | 1 + fs/f2fs/recovery.c | 14 +++++++++----- fs/f2fs/super.c | 3 +++ 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f7cdd3b536e3..646d3497bde7 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -697,6 +697,8 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) /* clear Orphan Flag */ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); out: + set_sbi_flag(sbi, SBI_IS_RECOVERED); + #ifdef CONFIG_QUOTA /* Turn quotas off */ if (quota_enabled) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ead9916582df..2ace2b36a2d3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1153,6 +1153,7 @@ enum { SBI_NEED_SB_WRITE, /* need to recover superblock */ SBI_NEED_CP, /* need to checkpoint */ SBI_IS_SHUTDOWN, /* shutdown by ioctl */ + SBI_IS_RECOVERED, /* recovered orphan/data */ }; enum { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 501bb0fdda1b..e34ca1686e3b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -697,11 +697,15 @@ skip: /* let's drop all the directory inodes for clean checkpoint */ destroy_fsync_dnodes(&dir_list); - if (!err && need_writecp) { - struct cp_control cpc = { - .reason = CP_RECOVERY, - }; - err = f2fs_write_checkpoint(sbi, &cpc); + if (need_writecp) { + set_sbi_flag(sbi, SBI_IS_RECOVERED); + + if (!err) { + struct cp_control cpc = { + .reason = CP_RECOVERY, + }; + err = f2fs_write_checkpoint(sbi, &cpc); + } } kmem_cache_destroy(fsync_entry_slab); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 68290ea51660..51888dcabb35 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3187,6 +3187,9 @@ static void kill_f2fs_super(struct super_block *sb) }; f2fs_write_checkpoint(sbi, &cpc); } + + if (is_sbi_flag_set(sbi, SBI_IS_RECOVERED) && f2fs_readonly(sb)) + sb->s_flags &= ~MS_RDONLY; } kill_block_super(sb); } From 41e7c940b30d135a5424ae1fad018945e0e39008 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 31 Aug 2018 22:33:50 +0800 Subject: [PATCH 021/189] f2fs: cache NULL when both default_acl and acl are NULL default_acl and acl of newly created inode will be initiated as ACL_NOT_CACHED in vfs function inode_init_always() and later will be updated by calling xxx_init_acl() in specific filesystems. Howerver, when default_acl and acl are NULL then they keep the value of ACL_NOT_CACHED, this patch tries to cache NULL for acl/default_acl in this case. Signed-off-by: Chengguang Xu Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index c65158a8bb06..409537cbf4d3 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -400,12 +400,16 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, ipage); posix_acl_release(default_acl); + } else { + inode->i_default_acl = NULL; } if (acl) { if (!error) error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl, ipage); posix_acl_release(acl); + } else { + inode->i_acl = NULL; } return error; From e95ef46b0d135eba8e15f9aee140f71bd83c8714 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Sep 2018 14:54:01 +0800 Subject: [PATCH 022/189] f2fs: fix memory leak of write_io in fill_super() It needs to release memory allocated for sbi->write_io in error path, otherwise, it will cause memory leak. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 51888dcabb35..c96aa5f3ee80 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2879,7 +2879,7 @@ try_onemore: GFP_KERNEL); if (!sbi->write_io[i]) { err = -ENOMEM; - goto free_options; + goto free_bio_info; } for (j = HOT; j < n; j++) { From 9c051754e9eaf69424f1c0df2c61a9b1a5d0f7c7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Sep 2018 14:54:02 +0800 Subject: [PATCH 023/189] f2fs: fix memory leak of percpu counter in fill_super() In fill_super -> init_percpu_info, we should destroy percpu counter in error path, otherwise memory allcoated for percpu counter will leak. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c96aa5f3ee80..14ac38a1dbe7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2462,8 +2462,12 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) if (err) return err; - return percpu_counter_init(&sbi->total_valid_inode_count, 0, + err = percpu_counter_init(&sbi->total_valid_inode_count, 0, GFP_KERNEL); + if (err) + percpu_counter_destroy(&sbi->alloc_valid_block_count); + + return err; } #ifdef CONFIG_BLK_DEV_ZONED From 99255f120022d82457d8d71b526a92b7f37f4dd2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 6 Sep 2018 20:34:12 +0800 Subject: [PATCH 024/189] f2fs: fix to do sanity check with current segment number https://bugzilla.kernel.org/show_bug.cgi?id=200219 Reproduction way: - mount image - run poc code - umount image F2FS-fs (loop1): Bitmap was wrongly set, blk:15364 ------------[ cut here ]------------ kernel BUG at /home/yuchao/git/devf2fs/segment.c:2061! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 2 PID: 17686 Comm: umount Tainted: G W O 4.18.0-rc2+ #39 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 EIP: update_sit_entry+0x459/0x4e0 [f2fs] Code: e8 1c b5 fd ff 0f 0b 0f 0b 8b 45 e4 c7 44 24 08 9c 7a 6c f8 c7 44 24 04 bc 4a 6c f8 89 44 24 0c 8b 06 89 04 24 e8 f7 b4 fd ff <0f> 0b 8b 45 e4 0f b6 d2 89 54 24 10 c7 44 24 08 60 7a 6c f8 c7 44 EAX: 00000032 EBX: 000000f8 ECX: 00000002 EDX: 00000001 ESI: d7177000 EDI: f520fe68 EBP: d6477c6c ESP: d6477c34 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010282 CR0: 80050033 CR2: b7fbe000 CR3: 2a99b3c0 CR4: 000406f0 Call Trace: f2fs_allocate_data_block+0x124/0x580 [f2fs] do_write_page+0x78/0x150 [f2fs] f2fs_do_write_node_page+0x25/0xa0 [f2fs] __write_node_page+0x2bf/0x550 [f2fs] f2fs_sync_node_pages+0x60e/0x6d0 [f2fs] ? sync_inode_metadata+0x2f/0x40 ? f2fs_write_checkpoint+0x28f/0x7d0 [f2fs] ? up_write+0x1e/0x80 f2fs_write_checkpoint+0x2a9/0x7d0 [f2fs] ? mark_held_locks+0x5d/0x80 ? _raw_spin_unlock_irq+0x27/0x50 kill_f2fs_super+0x68/0x90 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x81/0xa0 exit_to_usermode_loop+0x59/0xa7 do_fast_syscall_32+0x1f5/0x22c entry_SYSENTER_32+0x53/0x86 EIP: 0xb7f95c51 Code: c1 1e f7 ff ff 89 e5 8b 55 08 85 d2 8b 81 64 cd ff ff 74 02 89 02 5d c3 8b 0c 24 c3 8b 1c 24 c3 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 EAX: 00000000 EBX: 0871ab90 ECX: bfb2cd00 EDX: 00000000 ESI: 00000000 EDI: 0871ab90 EBP: 0871ab90 ESP: bfb2cd7c DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000246 Modules linked in: f2fs(O) crc32_generic bnep rfcomm bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev aesni_intel snd_seq_device aes_i586 snd_timer crypto_simd snd cryptd soundcore mac_hid serio_raw video i2c_piix4 parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs] ---[ end trace d423f83982cfcdc5 ]--- The reason is, different log headers using the same segment, once one log's next block address is used by another log, it will cause panic as above. Main area: 24 segs, 24 secs 24 zones - COLD data: 0, 0, 0 - WARM data: 1, 1, 1 - HOT data: 20, 20, 20 - Dir dnode: 22, 22, 22 - File dnode: 22, 22, 22 - Indir nodes: 21, 21, 21 So this patch adds sanity check to detect such condition to avoid this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 14ac38a1dbe7..2d50b4f9f1d2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2329,7 +2329,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int segment_count_main; unsigned int cp_pack_start_sum, cp_payload; block_t user_block_count; - int i; + int i, j; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); @@ -2370,11 +2370,43 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) return 1; + for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) == + le32_to_cpu(ckpt->cur_node_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Node segment (%u, %u) has the same " + "segno: %u", i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); + return 1; + } + } } for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) return 1; + for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_data_segno[i]) == + le32_to_cpu(ckpt->cur_data_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Data segment (%u, %u) has the same " + "segno: %u", i, j, + le32_to_cpu(ckpt->cur_data_segno[i])); + return 1; + } + } + } + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { + for (j = i; j < NR_CURSEG_DATA_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) == + le32_to_cpu(ckpt->cur_data_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Data segment (%u) and Data segment (%u)" + " has the same segno: %u", i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); + return 1; + } + } } sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); From cdbab19b219f61f218b62c072cacf460c569e234 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 7 Sep 2018 19:49:07 +0800 Subject: [PATCH 025/189] f2fs: plug readahead IO in readdir() Add a plug to merge readahead IO in readdir(), expecting it can reduce bio count before submitting to block layer. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 086639556705..2f2c29b6f5f9 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -784,9 +784,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, struct f2fs_dir_entry *de = NULL; struct fscrypt_str de_name = FSTR_INIT(NULL, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode); + struct blk_plug plug; + bool readdir_ra = sbi->readdir_ra == 1; + int err = 0; bit_pos = ((unsigned long)ctx->pos % d->max); + if (readdir_ra) + blk_start_plug(&plug); + while (bit_pos < d->max) { bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); if (bit_pos >= d->max) @@ -806,29 +812,33 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, if (f2fs_encrypted_inode(d->inode)) { int save_len = fstr->len; - int err; err = fscrypt_fname_disk_to_usr(d->inode, (u32)de->hash_code, 0, &de_name, fstr); if (err) - return err; + goto out; de_name = *fstr; fstr->len = save_len; } if (!dir_emit(ctx, de_name.name, de_name.len, - le32_to_cpu(de->ino), d_type)) - return 1; + le32_to_cpu(de->ino), d_type)) { + err = 1; + goto out; + } - if (sbi->readdir_ra == 1) + if (readdir_ra) f2fs_ra_node_page(sbi, le32_to_cpu(de->ino)); bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); ctx->pos = start_pos + bit_pos; } - return 0; +out: + if (readdir_ra) + blk_finish_plug(&plug); + return err; } static int f2fs_readdir(struct file *file, struct dir_context *ctx) From 3a365d71681a1b389337efefab99f3ae30061ab5 Mon Sep 17 00:00:00 2001 From: Zhikang Zhang Date: Mon, 10 Sep 2018 16:18:25 +0800 Subject: [PATCH 026/189] f2fs: avoid sleeping under spin_lock In the call trace below, we might sleep in function dput(). So in order to avoid sleeping under spin_lock, we remove f2fs_mark_inode_dirty_sync from __try_update_largest_extent && __drop_largest_extent. BUG: sleeping function called from invalid context at fs/dcache.c:796 Call trace: dump_backtrace+0x0/0x3f4 show_stack+0x24/0x30 dump_stack+0xe0/0x138 ___might_sleep+0x2a8/0x2c8 __might_sleep+0x78/0x10c dput+0x7c/0x750 block_dump___mark_inode_dirty+0x120/0x17c __mark_inode_dirty+0x344/0x11f0 f2fs_mark_inode_dirty_sync+0x40/0x50 __insert_extent_tree+0x2e0/0x2f4 f2fs_update_extent_tree_range+0xcf4/0xde8 f2fs_update_extent_cache+0x114/0x12c f2fs_update_data_blkaddr+0x40/0x50 write_data_page+0x150/0x314 do_write_data_page+0x648/0x2318 __write_data_page+0xdb4/0x1640 f2fs_write_cache_pages+0x768/0xafc __f2fs_write_data_pages+0x590/0x1218 f2fs_write_data_pages+0x64/0x74 do_writepages+0x74/0xe4 __writeback_single_inode+0xdc/0x15f0 writeback_sb_inodes+0x574/0xc98 __writeback_inodes_wb+0x190/0x204 wb_writeback+0x730/0xf14 wb_check_old_data_flush+0x1bc/0x1c8 wb_workfn+0x554/0xf74 process_one_work+0x440/0x118c worker_thread+0xac/0x974 kthread+0x1a0/0x1c8 ret_from_fork+0x10/0x1c Signed-off-by: Zhikang Zhang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 51 ++++++++++++++++++++++++++---------------- fs/f2fs/f2fs.h | 7 +++--- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 231b77ef5a53..a70cd2580eae 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -308,14 +308,13 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, return count - atomic_read(&et->node_cnt); } -static void __drop_largest_extent(struct inode *inode, +static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; - - if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { - largest->len = 0; - f2fs_mark_inode_dirty_sync(inode, true); + if (fofs < et->largest.fofs + et->largest.len && + fofs + len > et->largest.fofs) { + et->largest.len = 0; + et->largest_updated = true; } } @@ -416,12 +415,11 @@ out: return ret; } -static struct extent_node *__try_merge_extent_node(struct inode *inode, +static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, struct extent_node *next_ex) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_node *en = NULL; if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { @@ -443,7 +441,7 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, if (!en) return NULL; - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); spin_lock(&sbi->extent_lock); if (!list_empty(&en->list)) { @@ -454,12 +452,11 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode, return en; } -static struct extent_node *__insert_extent_tree(struct inode *inode, +static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei, struct rb_node **insert_p, struct rb_node *insert_parent) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct rb_node **p; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -476,7 +473,7 @@ do_insert: if (!en) return NULL; - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); /* update in global extent list */ spin_lock(&sbi->extent_lock); @@ -497,6 +494,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, struct rb_node **insert_p = NULL, *insert_parent = NULL; unsigned int end = fofs + len; unsigned int pos = (unsigned int)fofs; + bool updated = false; if (!et) return; @@ -517,7 +515,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, * drop largest extent before lookup, in case it's already * been shrunk from extent tree */ - __drop_largest_extent(inode, fofs, len); + __drop_largest_extent(et, fofs, len); /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, @@ -550,7 +548,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, set_extent_info(&ei, end, end - dei.fofs + dei.blk, org_end - end); - en1 = __insert_extent_tree(inode, et, &ei, + en1 = __insert_extent_tree(sbi, et, &ei, NULL, NULL); next_en = en1; } else { @@ -570,7 +568,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, } if (parts) - __try_update_largest_extent(inode, et, en); + __try_update_largest_extent(et, en); else __release_extent_node(sbi, et, en); @@ -590,15 +588,16 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (blkaddr) { set_extent_info(&ei, fofs, blkaddr, len); - if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en)) - __insert_extent_tree(inode, et, &ei, + if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) + __insert_extent_tree(sbi, et, &ei, insert_p, insert_parent); /* give up extent_cache, if split and small updates happen */ if (dei.len >= 1 && prev.len < F2FS_MIN_EXTENT_LEN && et->largest.len < F2FS_MIN_EXTENT_LEN) { - __drop_largest_extent(inode, 0, UINT_MAX); + et->largest.len = 0; + et->largest_updated = true; set_inode_flag(inode, FI_NO_EXTENT); } } @@ -606,7 +605,15 @@ static void f2fs_update_extent_tree_range(struct inode *inode, if (is_inode_flag_set(inode, FI_NO_EXTENT)) __free_extent_tree(sbi, et); + if (et->largest_updated) { + et->largest_updated = false; + updated = true; + } + write_unlock(&et->lock); + + if (updated) + f2fs_mark_inode_dirty_sync(inode, true); } unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) @@ -705,6 +712,7 @@ void f2fs_drop_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree; + bool updated = false; if (!f2fs_may_extent_tree(inode)) return; @@ -713,8 +721,13 @@ void f2fs_drop_extent_tree(struct inode *inode) write_lock(&et->lock); __free_extent_tree(sbi, et); - __drop_largest_extent(inode, 0, UINT_MAX); + if (et->largest.len) { + et->largest.len = 0; + updated = true; + } write_unlock(&et->lock); + if (updated) + f2fs_mark_inode_dirty_sync(inode, true); } void f2fs_destroy_extent_tree(struct inode *inode) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2ace2b36a2d3..c533a0fd357b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -637,6 +637,7 @@ struct extent_tree { struct list_head list; /* to be used by sbi->zombie_list */ rwlock_t lock; /* protect extent info rb-tree */ atomic_t node_cnt; /* # of extent node in rb-tree*/ + bool largest_updated; /* largest extent updated */ }; /* @@ -819,12 +820,12 @@ static inline bool __is_front_mergeable(struct extent_info *cur, } extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync); -static inline void __try_update_largest_extent(struct inode *inode, - struct extent_tree *et, struct extent_node *en) +static inline void __try_update_largest_extent(struct extent_tree *et, + struct extent_node *en) { if (en->ei.len > et->largest.len) { et->largest = en->ei; - f2fs_mark_inode_dirty_sync(inode, true); + et->largest_updated = true; } } From c4503a88feddedb22232ad48fc15698f4bd087b8 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 12 Sep 2018 13:32:52 +0800 Subject: [PATCH 027/189] f2fs: surround fault_injection related option parsing using CONFIG_F2FS_FAULT_INJECTION It's a little bit strange when fault_injection related options fail with -EINVAL which were already disabled from config, so surround all fault_injection related option parsing code using CONFIG_F2FS_FAULT_INJECTION. Meanwhile, slightly change warning message to keep consistency with option POSIX_ACL and FS_XATTR. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2d50b4f9f1d2..cbc485b5967f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -594,28 +594,31 @@ static int parse_options(struct super_block *sb, char *options) } F2FS_OPTION(sbi).write_io_size_bits = arg; break; +#ifdef CONFIG_F2FS_FAULT_INJECTION case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; -#ifdef CONFIG_F2FS_FAULT_INJECTION f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE); set_opt(sbi, FAULT_INJECTION); -#else - f2fs_msg(sb, KERN_INFO, - "FAULT_INJECTION was not selected"); -#endif break; + case Opt_fault_type: if (args->from && match_int(args, &arg)) return -EINVAL; -#ifdef CONFIG_F2FS_FAULT_INJECTION f2fs_build_fault_attr(sbi, 0, arg); set_opt(sbi, FAULT_INJECTION); -#else - f2fs_msg(sb, KERN_INFO, - "FAULT_INJECTION was not selected"); -#endif break; +#else + case Opt_fault_injection: + f2fs_msg(sb, KERN_INFO, + "fault_injection options not supported"); + break; + + case Opt_fault_type: + f2fs_msg(sb, KERN_INFO, + "fault_type options not supported"); + break; +#endif case Opt_lazytime: sb->s_flags |= MS_LAZYTIME; break; From 5d020af76e6de17167293ded5b1b37bd766228f3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Sep 2018 09:16:07 +0800 Subject: [PATCH 028/189] f2fs: add SPDX license identifiers Remove the verbose license text from f2fs files and replace them with SPDX tags. This does not change the license of any of the code. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 5 +---- fs/f2fs/acl.h | 5 +---- fs/f2fs/checkpoint.c | 5 +---- fs/f2fs/data.c | 5 +---- fs/f2fs/debug.c | 5 +---- fs/f2fs/dir.c | 5 +---- fs/f2fs/extent_cache.c | 5 +---- fs/f2fs/f2fs.h | 5 +---- fs/f2fs/file.c | 5 +---- fs/f2fs/gc.c | 5 +---- fs/f2fs/gc.h | 5 +---- fs/f2fs/hash.c | 5 +---- fs/f2fs/inline.c | 4 +--- fs/f2fs/inode.c | 5 +---- fs/f2fs/namei.c | 5 +---- fs/f2fs/node.c | 5 +---- fs/f2fs/node.h | 5 +---- fs/f2fs/recovery.c | 5 +---- fs/f2fs/segment.c | 5 +---- fs/f2fs/segment.h | 5 +---- fs/f2fs/shrinker.c | 5 +---- fs/f2fs/super.c | 5 +---- fs/f2fs/sysfs.c | 5 +---- fs/f2fs/trace.c | 5 +---- fs/f2fs/trace.h | 5 +---- fs/f2fs/xattr.c | 5 +---- fs/f2fs/xattr.h | 5 +---- include/linux/f2fs_fs.h | 5 +---- 28 files changed, 28 insertions(+), 111 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 409537cbf4d3..f82f916e1f2c 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/acl.c * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext2/acl.c * * Copyright (C) 2001-2003 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include "f2fs.h" diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 2c685185c24d..b96823c59b15 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/acl.h * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext2/acl.h * * Copyright (C) 2001-2003 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_ACL_H__ #define __F2FS_ACL_H__ diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 646d3497bde7..5a16a4ddbc01 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/checkpoint.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ae22394a969c..d95a8fa00e44 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/data.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ebe649d9793c..d3c402183e3c 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs debugging statistics * @@ -5,10 +6,6 @@ * http://www.samsung.com/ * Copyright (c) 2012 Linux Foundation * Copyright (c) 2012 Greg Kroah-Hartman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 2f2c29b6f5f9..a3302e2888dc 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/dir.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index a70cd2580eae..904ad7ba5a45 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs extent cache support * @@ -5,10 +6,6 @@ * Copyright (c) 2015 Samsung Electronics * Authors: Jaegeuk Kim * Chao Yu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c533a0fd357b..c96766c267fb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/f2fs.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _LINUX_F2FS_H #define _LINUX_F2FS_H diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c774e32d5ebb..ce0b32031294 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/file.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ada8b8056cd0..7e3f323254ef 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/gc.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index c8619e408009..bbac9d3787bd 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/gc.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #define GC_THREAD_MIN_WB_PAGES 1 /* * a threshold to determine diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index eb2e031ea887..cc82f142f811 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/hash.c * @@ -7,10 +8,6 @@ * Portions of this code from linux/fs/ext3/hash.c * * Copyright (C) 2002 by Theodore Ts'o - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a628c747e693..8caaab0685c7 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/inline.c * Copyright (c) 2013, Intel Corporation * Authors: Huajun Li * Haicheng Li - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 292f787a65e2..63e88eac5db3 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/inode.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 49cc29df9800..82c1f8824806 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/namei.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1af0805915b4..7d11854028e8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/node.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 0f4db7a61254..1c73d879a9bc 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/node.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ /* start node id of a node block dedicated to the given node id */ #define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e34ca1686e3b..4e9bcdda38df 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/recovery.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 20a68557e4f5..7c61edd8c11f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b3d9e317ff0c..086150028c6d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/segment.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 36cfd816c160..9e13db994fdf 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs shrinker support * the basic infra was copied from fs/ubifs/shrinker.c * * Copyright (c) 2015 Motorola Mobility * Copyright (c) 2015 Jaegeuk Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cbc485b5967f..f0ef16eca5e9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/super.c * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 30fd016afeb3..4c88b0720cdd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs sysfs interface * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ * Copyright (c) 2017 Chao Yu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index a1fcd00bbb2b..ce2a5eb210b6 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs IO tracer * * Copyright (c) 2014 Motorola Mobility * Copyright (c) 2014 Jaegeuk Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h index 67db24ac1e85..e8075fc5b228 100644 --- a/fs/f2fs/trace.h +++ b/fs/f2fs/trace.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * f2fs IO tracer * * Copyright (c) 2014 Motorola Mobility * Copyright (c) 2014 Jaegeuk Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_TRACE_H__ #define __F2FS_TRACE_H__ diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 152078bb4829..c7c3c2a63a85 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/xattr.c * @@ -13,10 +14,6 @@ * suggestion of Luka Renko . * xattr consolidation Copyright (c) 2004 James Morris , * Red Hat Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include #include diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 08a4840d6d7d..595d2af00a58 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * fs/f2fs/xattr.h * @@ -9,10 +10,6 @@ * On-disk format of extended attributes for the ext2 filesystem. * * (C) 2001 Andreas Gruenbacher, - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef __F2FS_XATTR_H__ #define __F2FS_XATTR_H__ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8e6a18582566..8b2b48bb53ff 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /** * include/linux/f2fs_fs.h * * Copyright (c) 2012 Samsung Electronics Co., Ltd. * http://www.samsung.com/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #ifndef _LINUX_F2FS_FS_H #define _LINUX_F2FS_FS_H From f1aab15eb34071ada2405437f8b4aa273c0d93fb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 12 Sep 2018 09:22:29 +0800 Subject: [PATCH 029/189] f2fs: split IO error injection according to RW This patch adds to support injecting error for write IO, this can simulate IO error like fail_make_request or dm_flakey does. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 3 ++- fs/f2fs/data.c | 9 +++++++-- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/super.c | 3 ++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 0c8bdd38cefd..c32641476f5e 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -172,9 +172,10 @@ fault_type=%d Support configuring fault injection type, should be FAULT_DIR_DEPTH 0x000000100 FAULT_EVICT_INODE 0x000000200 FAULT_TRUNCATE 0x000000400 - FAULT_IO 0x000000800 + FAULT_READ_IO 0x000000800 FAULT_CHECKPOINT 0x000001000 FAULT_DISCARD 0x000002000 + FAULT_WRITE_IO 0x000004000 mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d95a8fa00e44..0d76ff25c437 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -122,8 +122,8 @@ static bool f2fs_bio_post_read_required(struct bio *bio) static void f2fs_read_end_io(struct bio *bio) { - if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) { - f2fs_show_injection_info(FAULT_IO); + if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) { + f2fs_show_injection_info(FAULT_READ_IO); bio->bi_error = -EIO; } @@ -144,6 +144,11 @@ static void f2fs_write_end_io(struct bio *bio) struct bio_vec *bvec; int i; + if (time_to_inject(sbi, FAULT_WRITE_IO)) { + f2fs_show_injection_info(FAULT_WRITE_IO); + bio->bi_error = -EIO; + } + bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; enum count_type type = WB_DATA_TYPE(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c96766c267fb..5580f21e35ae 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -52,9 +52,10 @@ enum { FAULT_DIR_DEPTH, FAULT_EVICT_INODE, FAULT_TRUNCATE, - FAULT_IO, + FAULT_READ_IO, FAULT_CHECKPOINT, FAULT_DISCARD, + FAULT_WRITE_IO, FAULT_MAX, }; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f0ef16eca5e9..0bd53c8be22d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -50,9 +50,10 @@ char *f2fs_fault_name[FAULT_MAX] = { [FAULT_DIR_DEPTH] = "too big dir depth", [FAULT_EVICT_INODE] = "evict_inode fail", [FAULT_TRUNCATE] = "truncate fail", - [FAULT_IO] = "IO error", + [FAULT_READ_IO] = "read IO error", [FAULT_CHECKPOINT] = "checkpoint error", [FAULT_DISCARD] = "discard error", + [FAULT_WRITE_IO] = "write IO error", }; void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, From 47c4cfebd60f9e001bb50bb266a6899777a806c3 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 19 Sep 2018 14:18:47 +0530 Subject: [PATCH 030/189] f2fs: add new idle interval timing for discard and gc paths This helps to control the frequency of submission of discard and GC requests independently, based on the need. Suggested-by: Chao Yu Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 17 +++++++++++++- fs/f2fs/f2fs.h | 30 ++++++++++++++++++++++--- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 14 +++++------- fs/f2fs/super.c | 2 ++ fs/f2fs/sysfs.c | 5 +++++ 6 files changed, 56 insertions(+), 14 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 3bbb9fe9548c..6b5e31f470dc 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -121,7 +121,22 @@ What: /sys/fs/f2fs//idle_interval Date: January 2016 Contact: "Jaegeuk Kim" Description: - Controls the idle timing. + Controls the idle timing for all paths other than + discard and gc path. + +What: /sys/fs/f2fs//discard_idle_interval +Date: September 2018 +Contact: "Chao Yu" +Contact: "Sahitya Tummala" +Description: + Controls the idle timing for discard path. + +What: /sys/fs/f2fs//gc_idle_interval +Date: September 2018 +Contact: "Chao Yu" +Contact: "Sahitya Tummala" +Description: + Controls the idle timing for gc path. What: /sys/fs/f2fs//iostat_enable Date: August 2017 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5580f21e35ae..9f4a1a22f0d7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1158,6 +1158,8 @@ enum { enum { CP_TIME, REQ_TIME, + DISCARD_TIME, + GC_TIME, MAX_TIME, }; @@ -1409,7 +1411,15 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { - sbi->last_time[type] = jiffies; + unsigned long now = jiffies; + + sbi->last_time[type] = now; + + /* DISCARD_TIME and GC_TIME are based on REQ_TIME */ + if (type == REQ_TIME) { + sbi->last_time[DISCARD_TIME] = now; + sbi->last_time[GC_TIME] = now; + } } static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) @@ -1419,7 +1429,21 @@ static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) return time_after(jiffies, sbi->last_time[type] + interval); } -static inline bool is_idle(struct f2fs_sb_info *sbi) +static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, + int type) +{ + unsigned long interval = sbi->interval_time[type] * HZ; + unsigned int wait_ms = 0; + long delta; + + delta = (sbi->last_time[type] + interval) - jiffies; + if (delta > 0) + wait_ms = jiffies_to_msecs(delta); + + return wait_ms; +} + +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { struct block_device *bdev = sbi->sb->s_bdev; struct request_queue *q = bdev_get_queue(bdev); @@ -1428,7 +1452,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi) if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) return false; - return f2fs_time_over(sbi, REQ_TIME); + return f2fs_time_over(sbi, type); } /* diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7e3f323254ef..5c80146a578e 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -80,7 +80,7 @@ static int gc_thread_func(void *data) if (!mutex_trylock(&sbi->gc_mutex)) goto next; - if (!is_idle(sbi)) { + if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); goto next; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7c61edd8c11f..384185e3a2f4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -508,7 +508,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) else f2fs_build_free_nids(sbi, false, false); - if (!is_idle(sbi) && + if (!is_idle(sbi, REQ_TIME) && (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi))) return; @@ -1388,7 +1388,7 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, if (dc->state != D_PREP) goto next; - if (dpolicy->io_aware && !is_idle(sbi)) { + if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; break; } @@ -1448,7 +1448,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, dc->state != D_PREP); if (dpolicy->io_aware && i < dpolicy->io_aware_gran && - !is_idle(sbi)) { + !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; break; } @@ -1641,8 +1641,6 @@ static int issue_discard_thread(void *data) struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; - unsigned long interval = sbi->interval_time[REQ_TIME] * HZ; - long delta; set_freezable(); @@ -1679,10 +1677,8 @@ static int issue_discard_thread(void *data) __wait_all_discard_cmd(sbi, &dpolicy); wait_ms = dpolicy.min_interval; } else if (issued == -1){ - delta = (sbi->last_time[REQ_TIME] + interval) - jiffies; - if (delta > 0) - wait_ms = jiffies_to_msecs(delta); - else + wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME); + if (!wait_ms) wait_ms = dpolicy.mid_interval; } else { wait_ms = dpolicy.max_interval; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 0bd53c8be22d..5ddfa0d0b386 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2465,6 +2465,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); for (i = 0; i < NR_COUNT_TYPE; i++) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 4c88b0720cdd..5a252d78d178 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -404,6 +404,9 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval, + interval_time[DISCARD_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); @@ -457,6 +460,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(dirty_nats_ratio), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), + ATTR_LIST(discard_idle_interval), + ATTR_LIST(gc_idle_interval), ATTR_LIST(iostat_enable), ATTR_LIST(readdir_ra), ATTR_LIST(gc_pin_file_thresh), From fa0b9799279721b5e8db54ef334fb6dda62563ce Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Sep 2018 15:45:19 -0700 Subject: [PATCH 031/189] f2fs: avoid infinite loop in f2fs_alloc_nid If we have an error in f2fs_build_free_nids, we're able to fall into a loop to find free nids. Suggested-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7d11854028e8..4dc98f5f2e84 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2353,8 +2353,9 @@ retry: spin_unlock(&nm_i->nid_list_lock); /* Let's scan nat pages and its caches to get free nids */ - f2fs_build_free_nids(sbi, true, false); - goto retry; + if (!f2fs_build_free_nids(sbi, true, false)) + goto retry; + return false; } /* From 6502c983b93d6b033529259ca3e29896c26863f0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Sep 2018 17:41:30 +0800 Subject: [PATCH 032/189] f2fs: fix to recover inode's uid/gid during POR Step to reproduce this bug: 1. logon as root 2. mount -t f2fs /dev/sdd /mnt; 3. touch /mnt/file; 4. chown system /mnt/file; chgrp system /mnt/file; 5. xfs_io -f /mnt/file -c "fsync"; 6. godown /mnt; 7. umount /mnt; 8. mount -t f2fs /dev/sdd /mnt; After step 8) we will expect file's uid/gid are all system, but during recovery, these two fields were not been recovered, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4e9bcdda38df..a990a49a0723 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -209,6 +209,8 @@ static void recover_inode(struct inode *inode, struct page *page) char *name; inode->i_mode = le16_to_cpu(raw->i_mode); + i_uid_write(inode, le32_to_cpu(raw->i_uid)); + i_gid_write(inode, le32_to_cpu(raw->i_gid)); f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); From bf3a5ea5c868f9a5fa6f313aba46e98c52b54895 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 22 Sep 2018 22:43:09 +0800 Subject: [PATCH 033/189] f2fs: fix remount problem of option io_bits Currently we show mount option "io_bits=%u" as "io_size=%uKB", it will cause option parsing problem(unrecognized mount option) in remount. Signed-off-by: Chengguang Xu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5ddfa0d0b386..24c390dbed1a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1331,7 +1331,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) from_kgid_munged(&init_user_ns, F2FS_OPTION(sbi).s_resgid)); if (F2FS_IO_SIZE_BITS(sbi)) - seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi)); + seq_printf(seq, ",io_bits=%u", + F2FS_OPTION(sbi).write_io_size_bits); #ifdef CONFIG_F2FS_FAULT_INJECTION if (test_opt(sbi, FAULT_INJECTION)) { seq_printf(seq, ",fault_injection=%u", From 80b258f2a6c58bdf444848daaf009c6ddcc983b6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Sep 2018 13:25:04 -0700 Subject: [PATCH 034/189] f2fs: report ENOENT correctly in f2fs_rename This fixes wrong error report in f2fs_rename. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 82c1f8824806..f85241dc8140 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -821,7 +821,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; bool is_old_inline = f2fs_has_inline_dentry(old_dir); - int err = -ENOENT; + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; @@ -845,6 +845,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -1010,7 +1011,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; struct f2fs_dir_entry *old_entry, *new_entry; int old_nlink = 0, new_nlink = 0; - int err = -ENOENT; + int err; if (unlikely(f2fs_cp_error(sbi))) return -EIO; @@ -1031,6 +1032,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out; + err = -ENOENT; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) From 202de66f20d1b349d0360e89272aa81346785a27 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Sep 2018 15:28:40 -0700 Subject: [PATCH 035/189] f2fs: update i_size after DIO completion This is related to ee70daaba82d ("xfs: update i_size after unwritten conversion in dio completion") If we update i_size during dio_write, dio_read can read out stale data, which breaks xfstests/465. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 15 +++++++-------- fs/f2fs/f2fs.h | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0d76ff25c437..739417e335ae 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -878,7 +878,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) struct f2fs_summary sum; struct node_info ni; block_t old_blkaddr; - pgoff_t fofs; blkcnt_t count = 1; int err; @@ -907,12 +906,10 @@ alloc: old_blkaddr, old_blkaddr); f2fs_set_data_blkaddr(dn); - /* update i_size */ - fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + - dn->ofs_in_node; - if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT)) - f2fs_i_size_write(dn->inode, - ((loff_t)(fofs + 1) << PAGE_SHIFT)); + /* + * i_size will be updated by direct_IO. Otherwise, we'll get stale + * data from unwritten block via dio_read. + */ return 0; } @@ -1078,6 +1075,8 @@ next_block: last_ofs_in_node = dn.ofs_in_node; } } else { + WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && + flag != F2FS_GET_BLOCK_DIO); err = __allocate_data_block(&dn, map->m_seg_type); if (!err) @@ -1257,7 +1256,7 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DEFAULT, NULL, + F2FS_GET_BLOCK_DIO, NULL, f2fs_rw_hint_to_seg_type( inode->i_write_hint)); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9f4a1a22f0d7..5850b3f1300f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -664,6 +664,7 @@ enum { F2FS_GET_BLOCK_DEFAULT, F2FS_GET_BLOCK_FIEMAP, F2FS_GET_BLOCK_BMAP, + F2FS_GET_BLOCK_DIO, F2FS_GET_BLOCK_PRE_DIO, F2FS_GET_BLOCK_PRE_AIO, F2FS_GET_BLOCK_PRECACHE, From 01ca8a368ff7bd8a74eb2b6c229a7ac2ed42372b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:35:58 +0800 Subject: [PATCH 036/189] f2fs: fix to recover inode's project id during POR Testcase to reproduce this bug: 1. mkfs.f2fs -O extra_attr -O project_quota /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr -p 1 /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. lsattr -p /mnt/f2fs/file 0 -----------------N- /mnt/f2fs/file But actually, we expect the correct result is: 1 -----------------N- /mnt/f2fs/file The reason is we didn't recover inode.i_projid field during mount, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index a990a49a0723..7bb254956110 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -211,6 +211,19 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mode = le16_to_cpu(raw->i_mode); i_uid_write(inode, le32_to_cpu(raw->i_uid)); i_gid_write(inode, le32_to_cpu(raw->i_gid)); + + if (raw->i_inline & F2FS_EXTRA_ATTR) { + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && + F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize), + i_projid)) { + projid_t i_projid; + + i_projid = (projid_t)le32_to_cpu(raw->i_projid); + F2FS_I(inode)->i_projid = + make_kprojid(&init_user_ns, i_projid); + } + } + f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); From a19aff4b0517357a26d0f849a6455c0a946b600c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:35:59 +0800 Subject: [PATCH 037/189] f2fs: fix to recover inode's i_flags during POR Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +A /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. lsattr /mnt/f2fs/file -----------------N- /mnt/f2fs/file But actually, we expect the corrct result is: -------A---------N- /mnt/f2fs/file The reason is we didn't recover inode.i_flags field during mount, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7bb254956110..c0d110d26656 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -233,6 +233,7 @@ static void recover_inode(struct inode *inode, struct page *page) inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec); F2FS_I(inode)->i_advise = raw->i_advise; + F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); recover_inline_flags(inode, raw); From 9dd0bfb443446971e63c2f9a1cb4bc9d51081f7d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:00 +0800 Subject: [PATCH 038/189] f2fs: fix to recover inode's i_gc_failures during POR inode.i_gc_failures is used to indicate that skip count of migrating on blocks of inode, we should guarantee it can be recovered in sudden power-off case. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index c0d110d26656..14c156a256ca 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -234,6 +234,8 @@ static void recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = + le16_to_cpu(raw->i_gc_failures); recover_inline_flags(inode, raw); From 2238c3783157606539f7872e6499b847324835fc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:01 +0800 Subject: [PATCH 039/189] f2fs: fix to recover inode's crtime during POR Testcase to reproduce this bug: 1. mkfs.f2fs -O extra_attr -O inode_crtime /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. xfs_io -f /mnt/f2fs/file -c "fsync" 5. godown /mnt/f2fs 6. umount /mnt/f2fs 7. mount -t f2fs /dev/sdd /mnt/f2fs 8. xfs_io -f /mnt/f2fs/file -c "statx -r" stat.btime.tv_sec = 0 stat.btime.tv_nsec = 0 This patch fixes to recover inode creation time fields during mount. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4dc98f5f2e84..0706116c6ea4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2561,6 +2561,13 @@ retry: F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; + + if (f2fs_sb_has_inode_crtime(sbi->sb) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_crtime_nsec)) { + dst->i_crtime = src->i_crtime; + dst->i_crtime_nsec = src->i_crtime_nsec; + } } new_ni = old_ni; From 4b4f4610ed661ec3666282acc2eced3a2807756d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Sep 2018 15:36:03 +0800 Subject: [PATCH 040/189] f2fs: mark inode dirty explicitly in recover_inode() Mark inode dirty explicitly in the end of recover_inode() to make sure that all recoverable fields can be persisted later. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 14c156a256ca..6682c8d86a51 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -239,6 +239,8 @@ static void recover_inode(struct inode *inode, struct page *page) recover_inline_flags(inode, raw); + f2fs_mark_inode_dirty_sync(inode, true); + if (file_enc_name(inode)) name = ""; else From 614fe66d2715fbb6e3f73e12d89816cf2e7bc3a9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Sep 2018 17:36:06 -0700 Subject: [PATCH 041/189] f2fs: avoid f2fs_bug_on if f2fs_get_meta_page_nofail got EIO This patch avoids BUG_ON when f2fs_get_meta_page_nofail got EIO during xfstests/generic/475. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 10 +++++----- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 12 ++++++++++++ fs/f2fs/node.c | 32 ++++++++++++++++++++++++-------- fs/f2fs/recovery.c | 2 ++ fs/f2fs/segment.c | 3 +++ 6 files changed, 47 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5a16a4ddbc01..6c24f543e2d8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -119,11 +119,8 @@ retry: if (PTR_ERR(page) == -EIO && ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; - f2fs_stop_checkpoint(sbi, false); - f2fs_bug_on(sbi, 1); } - return page; } @@ -1497,7 +1494,10 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver); /* write cached NAT/SIT entries to NAT/SIT area */ - f2fs_flush_nat_entries(sbi, cpc); + err = f2fs_flush_nat_entries(sbi, cpc); + if (err) + goto stop; + f2fs_flush_sit_entries(sbi, cpc); /* unlock all the fs_lock[] in do_checkpoint() */ @@ -1506,7 +1506,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_release_discard_addrs(sbi); else f2fs_clear_prefree_segments(sbi, cpc); - +stop: unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5850b3f1300f..c356629f5089 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2996,7 +2996,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page); int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); int f2fs_build_node_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); int __init f2fs_create_node_manager_caches(void); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5c80146a578e..7e8bde2dd279 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1066,6 +1066,18 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, /* reference all summary page */ while (segno < end_segno) { sum_page = f2fs_get_sum_page(sbi, segno++); + if (IS_ERR(sum_page)) { + int err = PTR_ERR(sum_page); + + end_segno = segno - 1; + for (segno = start_segno; segno < end_segno; segno++) { + sum_page = find_get_page(META_MAPPING(sbi), + GET_SUM_BLOCK(sbi, segno)); + f2fs_put_page(sum_page, 0); + f2fs_put_page(sum_page, 0); + } + return err; + } unlock_page(sum_page); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0706116c6ea4..9a03c2844c93 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -126,6 +126,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) /* get current nat block page with lock */ src_page = get_current_nat_page(sbi, nid); + if (IS_ERR(src_page)) + return src_page; dst_page = f2fs_grab_meta_page(sbi, dst_off); f2fs_bug_on(sbi, PageDirty(src_page)); @@ -2268,15 +2270,19 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, nm_i->nat_block_bitmap)) { struct page *page = get_current_nat_page(sbi, nid); - ret = scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); + if (IS_ERR(page)) { + ret = PTR_ERR(page); + } else { + ret = scan_nat_page(sbi, page, nid); + f2fs_put_page(page, 1); + } if (ret) { up_read(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, !mount); f2fs_msg(sbi->sb, KERN_ERR, "NAT is corrupt, run fsck to fix it"); - return -EINVAL; + return ret; } } @@ -2711,7 +2717,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, __clear_bit_le(nat_index, nm_i->full_nat_bits); } -static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, +static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, struct nat_entry_set *set, struct cp_control *cpc) { struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2735,6 +2741,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, down_write(&curseg->journal_rwsem); } else { page = get_next_nat_page(sbi, start_nid); + if (IS_ERR(page)) + return PTR_ERR(page); + nat_blk = page_address(page); f2fs_bug_on(sbi, !nat_blk); } @@ -2780,12 +2789,13 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); kmem_cache_free(nat_entry_set_slab, set); } + return 0; } /* * This function is called during the checkpointing process. */ -void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) +int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -2795,6 +2805,7 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned int found; nid_t set_idx = 0; LIST_HEAD(sets); + int err = 0; /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */ if (enabled_nat_bits(sbi, cpc)) { @@ -2804,7 +2815,7 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } if (!nm_i->dirty_nat_cnt) - return; + return 0; down_write(&nm_i->nat_tree_lock); @@ -2827,11 +2838,16 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) } /* flush dirty nats in nat entry set */ - list_for_each_entry_safe(set, tmp, &sets, set_list) - __flush_nat_entry_set(sbi, set, cpc); + list_for_each_entry_safe(set, tmp, &sets, set_list) { + err = __flush_nat_entry_set(sbi, set, cpc); + if (err) + break; + } up_write(&nm_i->nat_tree_lock); /* Allow dirty nats by node block allocation in write_begin */ + + return err; } static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 6682c8d86a51..b069b7d7403d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -375,6 +375,8 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, } sum_page = f2fs_get_sum_page(sbi, segno); + if (IS_ERR(sum_page)) + return PTR_ERR(sum_page); sum_node = (struct f2fs_summary_block *)page_address(sum_page); sum = sum_node->entries[blkoff]; f2fs_put_page(sum_page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 384185e3a2f4..2e9c6dd183b6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2509,6 +2509,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type) __next_free_blkoff(sbi, curseg, 0); sum_page = f2fs_get_sum_page(sbi, new_segno); + f2fs_bug_on(sbi, IS_ERR(sum_page)); sum_node = (struct f2fs_summary_block *)page_address(sum_page); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); f2fs_put_page(sum_page, 1); @@ -3984,6 +3985,8 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) se = &sit_i->sentries[start]; page = get_current_sit_page(sbi, start); + if (IS_ERR(page)) + return PTR_ERR(page); sit_blk = (struct f2fs_sit_block *)page_address(page); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); From a3b89f58fc863aab72afee892ab24fc3a2fd550b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Sep 2018 15:25:21 -0700 Subject: [PATCH 042/189] f2fs: return correct errno in f2fs_gc This fixes overriding error number in f2fs_gc. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7e8bde2dd279..ff2708bde48d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1253,7 +1253,7 @@ stop: put_gc_inode(&gc_list); - if (sync) + if (sync && !ret) ret = sec_freed ? 0 : -EAGAIN; return ret; } From 129674a584f4bfcf3e69d4fad6614dfff1906f55 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Sep 2018 22:15:31 -0700 Subject: [PATCH 043/189] f2fs: fix missing up_read This patch fixes missing up_read call. Fixes: c9b60788fc76 ("f2fs: fix to do sanity check with block address in main area") Cc: # 4.19+ Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9a03c2844c93..bde71a04cf95 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1541,8 +1541,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, } if (__is_valid_data_blkaddr(ni.blk_addr) && - !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC)) { + up_read(&sbi->node_write); goto redirty_out; + } if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= WRITE_FLUSH_FUA; From 092a13da9c4edd8c0782004febb3af1d40a80800 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 28 Sep 2018 00:24:39 -0700 Subject: [PATCH 044/189] f2fs: keep lazytime on remount This patch fixes losing lazytime when remounting f2fs. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 24c390dbed1a..f30a6b67b7b4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1551,6 +1551,7 @@ skip: (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); limit_reserve_root(sbi); + *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); return 0; restore_gc: if (need_restart_gc) { From fb32c954b6a90f029382e4f35434fb071fdf682f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 29 Sep 2018 18:31:27 +0800 Subject: [PATCH 045/189] f2fs: add to account meta IO This patch supports to account meta IO, it enables to show write IO from f2fs more comprehensively via 'status' debugfs entry. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 13 +++++++++++++ fs/f2fs/f2fs.h | 15 +++++++++++++++ fs/f2fs/segment.c | 1 + 3 files changed, 29 insertions(+) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index d3c402183e3c..da1cabbc4973 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -118,6 +118,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); } + for (i = META_CP; i < META_MAX; i++) + si->meta_count[i] = atomic_read(&sbi->meta_count[i]); + for (i = 0; i < 2; i++) { si->segment_count[i] = sbi->segment_count[i]; si->block_count[i] = sbi->block_count[i]; @@ -329,6 +332,13 @@ static int stat_show(struct seq_file *s, void *v) si->prefree_count, si->free_segs, si->free_secs); seq_printf(s, "CP calls: %d (BG: %d)\n", si->cp_count, si->bg_cp_count); + seq_printf(s, " - cp blocks : %u\n", si->meta_count[META_CP]); + seq_printf(s, " - sit blocks : %u\n", + si->meta_count[META_SIT]); + seq_printf(s, " - nat blocks : %u\n", + si->meta_count[META_NAT]); + seq_printf(s, " - ssa blocks : %u\n", + si->meta_count[META_SSA]); seq_printf(s, "GC calls: %d (BG: %d)\n", si->call_count, si->bg_gc); seq_printf(s, " - data segments : %d (%d)\n", @@ -441,6 +451,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); if (!si) @@ -466,6 +477,8 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_dir, 0); atomic_set(&sbi->inplace_count, 0); + for (i = META_CP; i < META_MAX; i++) + atomic_set(&sbi->meta_count[i], 0); atomic_set(&sbi->aw_cnt, 0); atomic_set(&sbi->vw_cnt, 0); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c356629f5089..ba94df0c117a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -269,6 +269,7 @@ enum { META_NAT, META_SIT, META_SSA, + META_MAX, META_POR, DATA_GENERIC, META_GENERIC, @@ -1325,6 +1326,7 @@ struct f2fs_sb_info { */ #ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info *stat_info; /* FS status information */ + atomic_t meta_count[META_MAX]; /* # of meta blocks */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ atomic_t inplace_count; /* # of inplace update */ @@ -3208,6 +3210,7 @@ struct f2fs_stat_info { int cursec[NR_CURSEG_TYPE]; int curzone[NR_CURSEG_TYPE]; + unsigned int meta_count[META_MAX]; unsigned int segment_count[2]; unsigned int block_count[2]; unsigned int inplace_count; @@ -3259,6 +3262,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_dentry(inode)) \ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) +#define stat_inc_meta_count(sbi, blkaddr) \ + do { \ + if (blkaddr < SIT_I(sbi)->sit_base_addr) \ + atomic_inc(&(sbi)->meta_count[META_CP]); \ + else if (blkaddr < NM_I(sbi)->nat_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SIT]); \ + else if (blkaddr < SM_I(sbi)->ssa_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_NAT]); \ + else if (blkaddr < SM_I(sbi)->main_blkaddr) \ + atomic_inc(&(sbi)->meta_count[META_SSA]); \ + } while (0) #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -3346,6 +3360,7 @@ void f2fs_destroy_root_stats(void); #define stat_inc_volatile_write(inode) do { } while (0) #define stat_dec_volatile_write(inode) do { } while (0) #define stat_update_max_volatile_write(inode) do { } while (0) +#define stat_inc_meta_count(sbi, blkaddr) do { } while (0) #define stat_inc_seg_type(sbi, curseg) do { } while (0) #define stat_inc_block_count(sbi, curseg) do { } while (0) #define stat_inc_inplace_blocks(sbi) do { } while (0) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2e9c6dd183b6..841ba43fc02b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3099,6 +3099,7 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, ClearPageError(page); f2fs_submit_page_write(&fio); + stat_inc_meta_count(sbi, page->index); f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); } From c95f10ed9fe0adbb785b2572d47bd3631c7584a5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 29 Sep 2018 18:31:28 +0800 Subject: [PATCH 046/189] f2fs: add to account skip count of background GC This patch adds to account skip count of background GC, and show stat info via 'status' debugfs entry. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 4 ++++ fs/f2fs/f2fs.h | 7 +++++++ fs/f2fs/gc.c | 14 +++++++++++--- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index da1cabbc4973..75bc62edc4c1 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -101,6 +101,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->avail_nids = NM_I(sbi)->available_nids; si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; si->bg_gc = sbi->bg_gc; + si->io_skip_bggc = sbi->io_skip_bggc; + si->other_skip_bggc = sbi->other_skip_bggc; si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC]; si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC]; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) @@ -355,6 +357,8 @@ static int stat_show(struct seq_file *s, void *v) si->skipped_atomic_files[BG_GC] + si->skipped_atomic_files[FG_GC], si->skipped_atomic_files[BG_GC]); + seq_printf(s, "BG skip : IO: %u, Other: %u\n", + si->io_skip_bggc, si->other_skip_bggc); seq_puts(s, "\nExtent Cache:\n"); seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n", si->hit_largest, si->hit_cached, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ba94df0c117a..0df8903dfa92 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1342,6 +1342,8 @@ struct f2fs_sb_info { atomic_t max_aw_cnt; /* max # of atomic writes */ atomic_t max_vw_cnt; /* max # of volatile writes */ int bg_gc; /* background gc calls */ + unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ + unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif spinlock_t stat_lock; /* lock for stat operations */ @@ -3189,6 +3191,7 @@ struct f2fs_stat_info { int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; + unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; int nr_discard_cmd; @@ -3226,6 +3229,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) +#define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) +#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) #define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--) #define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext)) @@ -3342,6 +3347,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_bg_cp_count(si) do { } while (0) #define stat_inc_call_count(si) do { } while (0) #define stat_inc_bggc_count(si) do { } while (0) +#define stat_io_skip_bggc_count(sbi) do { } while (0) +#define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) #define stat_inc_total_hit(sb) do { } while (0) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ff2708bde48d..4db16f7c6858 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -40,13 +40,16 @@ static int gc_thread_func(void *data) if (gc_th->gc_wake) gc_th->gc_wake = 0; - if (try_to_freeze()) + if (try_to_freeze()) { + stat_other_skip_bggc_count(sbi); continue; + } if (kthread_should_stop()) break; if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) { increase_sleep_time(gc_th, &wait_ms); + stat_other_skip_bggc_count(sbi); continue; } @@ -55,8 +58,10 @@ static int gc_thread_func(void *data) f2fs_stop_checkpoint(sbi, false); } - if (!sb_start_write_trylock(sbi->sb)) + if (!sb_start_write_trylock(sbi->sb)) { + stat_other_skip_bggc_count(sbi); continue; + } /* * [GC triggering condition] @@ -77,12 +82,15 @@ static int gc_thread_func(void *data) goto do_gc; } - if (!mutex_trylock(&sbi->gc_mutex)) + if (!mutex_trylock(&sbi->gc_mutex)) { + stat_other_skip_bggc_count(sbi); goto next; + } if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); + stat_io_skip_bggc_count(sbi); goto next; } From f6a7a0346c35a6477f7b09b3a2f074e93cdafd7d Mon Sep 17 00:00:00 2001 From: Junling Zheng Date: Fri, 28 Sep 2018 20:25:56 +0800 Subject: [PATCH 047/189] f2fs: support superblock checksum Now we support crc32 checksum for superblock. Reviewed-by: Chao Yu Signed-off-by: Junling Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/super.c | 28 ++++++++++++++++++++++++++++ fs/f2fs/sysfs.c | 7 +++++++ include/linux/f2fs_fs.h | 3 ++- 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0df8903dfa92..4810e05d3173 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -150,6 +150,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_INODE_CRTIME 0x0100 #define F2FS_FEATURE_LOST_FOUND 0x0200 #define F2FS_FEATURE_VERITY 0x0400 /* reserved */ +#define F2FS_FEATURE_SB_CHKSUM 0x0800 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -3516,6 +3517,7 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO); F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME); F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); +F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f30a6b67b7b4..cca2ccbf18fe 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2182,6 +2182,26 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, (bh->b_data + F2FS_SUPER_OFFSET); struct super_block *sb = sbi->sb; unsigned int blocksize; + size_t crc_offset = 0; + __u32 crc = 0; + + /* Check checksum_offset and crc in superblock */ + if (le32_to_cpu(raw_super->feature) & F2FS_FEATURE_SB_CHKSUM) { + crc_offset = le32_to_cpu(raw_super->checksum_offset); + if (crc_offset != + offsetof(struct f2fs_super_block, crc)) { + f2fs_msg(sb, KERN_INFO, + "Invalid SB checksum offset: %zu", + crc_offset); + return 1; + } + crc = le32_to_cpu(raw_super->crc); + if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { + f2fs_msg(sb, KERN_INFO, + "Invalid SB checksum value: %u", crc); + return 1; + } + } if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { f2fs_msg(sb, KERN_INFO, @@ -2639,6 +2659,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { struct buffer_head *bh; + __u32 crc = 0; int err; if ((recover && f2fs_readonly(sbi->sb)) || @@ -2647,6 +2668,13 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) return -EROFS; } + /* we should update superblock crc here */ + if (!recover && f2fs_sb_has_sb_chksum(sbi->sb)) { + crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi), + offsetof(struct f2fs_super_block, crc)); + F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc); + } + /* write back-up superblock first */ bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); if (!bh) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 5a252d78d178..db89741b219b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -117,6 +117,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_lost_found(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "lost_found"); + if (f2fs_sb_has_sb_chksum(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "sb_checksum"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -334,6 +337,7 @@ enum feat_id { FEAT_QUOTA_INO, FEAT_INODE_CRTIME, FEAT_LOST_FOUND, + FEAT_SB_CHECKSUM, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -350,6 +354,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_QUOTA_INO: case FEAT_INODE_CRTIME: case FEAT_LOST_FOUND: + case FEAT_SB_CHECKSUM: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -434,6 +439,7 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME); F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND); +F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -493,6 +499,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(quota_ino), ATTR_LIST(inode_crtime), ATTR_LIST(lost_found), + ATTR_LIST(sb_checksum), NULL, }; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8b2b48bb53ff..a5a5bd94551e 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -109,7 +109,8 @@ struct f2fs_super_block { struct f2fs_device devs[MAX_DEVICES]; /* device list */ __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ __u8 hot_ext_count; /* # of hot file extension */ - __u8 reserved[314]; /* valid reserved region */ + __u8 reserved[310]; /* valid reserved region */ + __le32 crc; /* checksum of superblock */ } __packed; /* From dbb428fbbb59fae08b01fc14d98d320d448877e0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Sep 2018 23:41:16 +0800 Subject: [PATCH 048/189] Revert: "f2fs: check last page index in cached bio to decide submission" There is one case that we can leave bio in f2fs, result in hanging page writeback waiter. Thread A Thread B - f2fs_write_cache_pages - f2fs_submit_page_write page #0 cached in bio #0 of cold log - f2fs_submit_page_write page #1 cached in bio #1 of warm log - f2fs_write_cache_pages - f2fs_submit_page_write bio is full, submit bio #1 contain page #1 - f2fs_submit_merged_write_cond(, page #1) fail to submit bio #0 due to page #1 is not in any cached bios. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +-- fs/f2fs/data.c | 38 +++++++++++++++++++------------------- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/node.c | 11 +++++------ fs/f2fs/segment.c | 11 +++++------ 5 files changed, 32 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6c24f543e2d8..96ad2358ddb2 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -277,8 +277,7 @@ static int __f2fs_write_meta_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, META); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); unlock_page(page); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 739417e335ae..71d7d929f324 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -319,8 +319,8 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) io->bio = NULL; } -static bool __has_merged_page(struct f2fs_bio_info *io, - struct inode *inode, nid_t ino, pgoff_t idx) +static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, + struct page *page, nid_t ino) { struct bio_vec *bvec; struct page *target; @@ -329,7 +329,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, if (!io->bio) return false; - if (!inode && !ino) + if (!inode && !page && !ino) return true; bio_for_each_segment_all(bvec, io->bio, i) { @@ -339,11 +339,10 @@ static bool __has_merged_page(struct f2fs_bio_info *io, else target = fscrypt_control_page(bvec->bv_page); - if (idx != target->index) - continue; - if (inode && inode == target->mapping->host) return true; + if (page && page == target) + return true; if (ino && ino == ino_of_node(target)) return true; } @@ -352,7 +351,8 @@ static bool __has_merged_page(struct f2fs_bio_info *io, } static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, - nid_t ino, pgoff_t idx, enum page_type type) + struct page *page, nid_t ino, + enum page_type type) { enum page_type btype = PAGE_TYPE_OF_BIO(type); enum temp_type temp; @@ -363,7 +363,7 @@ static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, io = sbi->write_io[btype] + temp; down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, ino, idx); + ret = __has_merged_page(io, inode, page, ino); up_read(&io->io_rwsem); /* TODO: use HOT temp only for meta pages now. */ @@ -394,12 +394,12 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type, bool force) + struct inode *inode, struct page *page, + nid_t ino, enum page_type type, bool force) { enum temp_type temp; - if (!force && !has_merged_page(sbi, inode, ino, idx, type)) + if (!force && !has_merged_page(sbi, inode, page, ino, type)) return; for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { @@ -418,10 +418,10 @@ void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) } void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type) + struct inode *inode, struct page *page, + nid_t ino, enum page_type type) { - __submit_merged_write_cond(sbi, inode, ino, idx, type, false); + __submit_merged_write_cond(sbi, inode, page, ino, type, false); } void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) @@ -1948,7 +1948,7 @@ out: ClearPageUptodate(page); if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); clear_inode_flag(inode, FI_HOT_DATA); f2fs_remove_dirty_inode(inode); submitted = NULL; @@ -2006,10 +2006,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; - pgoff_t last_idx = ULONG_MAX; int cycled; int range_whole = 0; int tag; + int nwritten = 0; pagevec_init(&pvec, 0); @@ -2112,7 +2112,7 @@ continue_unlock: done = 1; break; } else if (submitted) { - last_idx = page->index; + nwritten++; } if (--wbc->nr_to_write <= 0 && @@ -2134,9 +2134,9 @@ continue_unlock: if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; - if (last_idx != ULONG_MAX) + if (nwritten) f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, - 0, last_idx, DATA); + NULL, 0, DATA); return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4810e05d3173..493188ef3023 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3117,8 +3117,8 @@ int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, - struct inode *inode, nid_t ino, pgoff_t idx, - enum page_type type); + struct inode *inode, struct page *page, + nid_t ino, enum page_type type); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); void f2fs_submit_page_write(struct f2fs_io_info *fio); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bde71a04cf95..c1936432154f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1565,8 +1565,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, up_read(&sbi->node_write); if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0, - page->index, NODE); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); submitted = NULL; } @@ -1631,13 +1630,13 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, unsigned int *seq_id) { pgoff_t index; - pgoff_t last_idx = ULONG_MAX; struct pagevec pvec; int ret = 0; struct page *last_page = NULL; bool marked = false; nid_t ino = inode->i_ino; int nr_pages; + int nwritten = 0; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1715,7 +1714,7 @@ continue_unlock: f2fs_put_page(last_page, 0); break; } else if (submitted) { - last_idx = page->index; + nwritten++; } if (page == last_page) { @@ -1741,8 +1740,8 @@ continue_unlock: goto retry; } out: - if (last_idx != ULONG_MAX) - f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE); + if (nwritten) + f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE); return ret ? -EIO: 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 841ba43fc02b..5dd7d7cecabf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -371,7 +371,7 @@ static int __f2fs_commit_inmem_pages(struct inode *inode) .io_type = FS_DATA_IO, }; struct list_head revoke_list; - pgoff_t last_idx = ULONG_MAX; + bool submit_bio = false; int err = 0; INIT_LIST_HEAD(&revoke_list); @@ -406,14 +406,14 @@ retry: } /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; - last_idx = page->index; + submit_bio = true; } unlock_page(page); list_move_tail(&cur->list, &revoke_list); } - if (last_idx != ULONG_MAX) - f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA); + if (submit_bio) + f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA); if (err) { /* @@ -3262,8 +3262,7 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); - f2fs_submit_merged_write_cond(sbi, page->mapping->host, - 0, page->index, type); + f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); if (ordered) wait_on_page_writeback(page); else From a84324083292e3c923fad16668f5039f4d4af5c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Sep 2018 18:33:18 +0800 Subject: [PATCH 049/189] f2fs: refactor ->page_mkwrite() flow Thread A Thread B - f2fs_vm_page_mkwrite - f2fs_setattr - down_write(i_mmap_sem) - truncate_setsize - f2fs_truncate - up_write(i_mmap_sem) - f2fs_reserve_block reserve NEW_ADDR - skip dirty page due to truncation 1. we don't need to rserve new block address for a truncated page. 2. dn.data_blkaddr is used out of node page lock coverage. Refactor ->page_mkwrite() flow to fix above issues: - use __do_map_lock() to avoid racing checkpoint() - lock data page in prior to dnode page - cover f2fs_reserve_block with i_mmap_sem lock - wait page writeback before zeroing page Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 45 ++++++++++++++++++++++----------------------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 71d7d929f324..586d87a2697a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -969,7 +969,7 @@ map_blocks: return err; } -static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (lock) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 493188ef3023..79295c280c83 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3140,6 +3140,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, struct page *f2fs_get_new_data_page(struct inode *inode, struct page *ipage, pgoff_t index, bool new_i_size); int f2fs_do_write_data_page(struct f2fs_io_info *fio); +void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock); int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ce0b32031294..d83e8dad12e9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -50,7 +50,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dnode_of_data dn; + struct dnode_of_data dn = { .node_changed = false }; int err; if (unlikely(f2fs_cp_error(sbi))) { @@ -62,19 +62,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); - /* block allocation */ - f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, page->index); - if (err) { - f2fs_unlock_op(sbi); - goto out; - } - f2fs_put_dnode(&dn); - f2fs_unlock_op(sbi); - - f2fs_balance_fs(sbi, dn.node_changed); - file_update_time(vma->vm_file); down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); @@ -86,11 +73,28 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, goto out_sem; } + /* block allocation */ + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_block(&dn, page->index); + f2fs_put_dnode(&dn); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + if (err) { + unlock_page(page); + goto out_sem; + } + + /* fill the page */ + f2fs_wait_on_page_writeback(page, DATA, false); + + /* wait for GCed page writeback via META_MAPPING */ + f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); + /* * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) - goto mapped; + goto out_sem; /* page is wholly or partially inside EOF */ if (((loff_t)(page->index + 1) << PAGE_SHIFT) > @@ -107,16 +111,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); trace_f2fs_vm_page_mkwrite(page, DATA); -mapped: - /* fill the page */ - f2fs_wait_on_page_writeback(page, DATA, false); - - /* wait for GCed page writeback via META_MAPPING */ - f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); - out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); -out: + + f2fs_balance_fs(sbi, dn.node_changed); + sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); err: From 511f52d02f05dbefb355deed05632cc6f9ce9a19 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Sep 2018 18:34:52 +0800 Subject: [PATCH 050/189] f2fs: allow out-place-update for direct IO in LFS mode Normally, DIO uses in-pllace-update, but in LFS mode, f2fs doesn't allow triggering any in-place-update writes, so we fallback direct write to buffered write, result in bad performance of large size write. This patch adds to support triggering out-place-update for direct IO to enhance its performance. Note that it needs to exclude direct read IO during direct write, since new data writing to new block address will no be valid until write finished. storage: zram time xfs_io -f -d /mnt/f2fs/file -c "pwrite 0 1073741824" -c "fsync" Before: real 0m13.061s user 0m0.327s sys 0m12.486s After: real 0m6.448s user 0m0.228s sys 0m6.212s Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 44 +++++++++++++++++++++++++++++++++++--------- fs/f2fs/f2fs.h | 45 +++++++++++++++++++++++++++++++++++++++++---- fs/f2fs/file.c | 3 ++- 3 files changed, 78 insertions(+), 14 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 586d87a2697a..41fc5ff29f70 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -890,7 +890,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) dn->data_blkaddr = datablock_addr(dn->inode, dn->node_page, dn->ofs_in_node); - if (dn->data_blkaddr == NEW_ADDR) + if (dn->data_blkaddr != NULL_ADDR) goto alloc; if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) @@ -944,7 +944,7 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) if (direct_io) { map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); - flag = f2fs_force_buffered_io(inode, WRITE) ? + flag = f2fs_force_buffered_io(inode, iocb, from) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; goto map_blocks; @@ -1063,7 +1063,15 @@ next_block: goto sync_out; } - if (!is_valid_data_blkaddr(sbi, blkaddr)) { + if (is_valid_data_blkaddr(sbi, blkaddr)) { + /* use out-place-update for driect IO under LFS mode */ + if (test_opt(sbi, LFS) && create && + flag == F2FS_GET_BLOCK_DIO) { + err = __allocate_data_block(&dn, map->m_seg_type); + if (!err) + set_inode_flag(inode, FI_APPEND_WRITE); + } + } else { if (create) { if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -2483,35 +2491,52 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); size_t count = iov_iter_count(iter); int rw = iov_iter_rw(iter); int err; enum rw_hint hint = iocb->ki_hint; int whint_mode = F2FS_OPTION(sbi).whint_mode; + bool do_opu; err = check_direct_IO(inode, iter, offset); if (err) return err < 0 ? err : 0; - if (f2fs_force_buffered_io(inode, rw)) + if (f2fs_force_buffered_io(inode, iocb, iter)) return 0; + do_opu = allow_outplace_dio(inode, iocb, iter); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); if (rw == WRITE && whint_mode == WHINT_MODE_OFF) iocb->ki_hint = WRITE_LIFE_NOT_SET; - if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) { - if (iocb->ki_flags & IOCB_NOWAIT) { + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!down_read_trylock(&fi->i_gc_rwsem[rw])) { iocb->ki_hint = hint; err = -EAGAIN; goto out; } - down_read(&F2FS_I(inode)->i_gc_rwsem[rw]); + if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) { + up_read(&fi->i_gc_rwsem[rw]); + iocb->ki_hint = hint; + err = -EAGAIN; + goto out; + } + } else { + down_read(&fi->i_gc_rwsem[rw]); + if (do_opu) + down_read(&fi->i_gc_rwsem[READ]); } err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio); - up_read(&F2FS_I(inode)->i_gc_rwsem[rw]); + + if (do_opu) + up_read(&fi->i_gc_rwsem[READ]); + + up_read(&fi->i_gc_rwsem[rw]); if (rw == WRITE) { if (whint_mode == WHINT_MODE_OFF) @@ -2519,7 +2544,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (err > 0) { f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, err); - set_inode_flag(inode, FI_UPDATE_WRITE); + if (!do_opu) + set_inode_flag(inode, FI_UPDATE_WRITE); } else if (err < 0) { f2fs_write_failed(mapping, offset + count); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 79295c280c83..64be3fa73043 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -8,6 +8,7 @@ #ifndef _LINUX_F2FS_H #define _LINUX_F2FS_H +#include #include #include #include @@ -3576,11 +3577,47 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } -static inline bool f2fs_force_buffered_io(struct inode *inode, int rw) +static inline int block_unaligned_IO(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) { - return (f2fs_post_read_required(inode) || - (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) || - F2FS_I_SB(inode)->s_ndevs); + unsigned int i_blkbits = READ_ONCE(inode->i_blkbits); + unsigned int blocksize_mask = (1 << i_blkbits) - 1; + loff_t offset = iocb->ki_pos; + unsigned long align = offset | iov_iter_alignment(iter); + + return align & blocksize_mask; +} + +static inline int allow_outplace_dio(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int rw = iov_iter_rw(iter); + + return (test_opt(sbi, LFS) && (rw == WRITE) && + !block_unaligned_IO(inode, iocb, iter)); +} + +static inline bool f2fs_force_buffered_io(struct inode *inode, + struct kiocb *iocb, struct iov_iter *iter) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + int rw = iov_iter_rw(iter); + + if (f2fs_post_read_required(inode)) + return true; + if (sbi->s_ndevs) + return true; + /* + * for blkzoned device, fallback direct IO to buffered IO, so + * all IOs can be serialized by log-structured write. + */ + if (f2fs_sb_has_blkzoned(sbi->sb)) + return true; + if (test_opt(sbi, LFS) && (rw == WRITE) && + block_unaligned_IO(inode, iocb, iter)) + return true; + return false; } #ifdef CONFIG_F2FS_FAULT_INJECTION diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d83e8dad12e9..9c8d2cb4585e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2795,7 +2795,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!f2fs_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)) || f2fs_has_inline_data(inode) || - f2fs_force_buffered_io(inode, WRITE)) { + f2fs_force_buffered_io(inode, + iocb, from)) { clear_inode_flag(inode, FI_NO_PREALLOC); inode_unlock(inode); From bb423cfd60a4c6f62707e55e213a79e8d0f2bfd8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Sep 2018 13:54:33 -0700 Subject: [PATCH 051/189] f2fs: clear PageError on the read path When running fault injection test, I hit somewhat wrong behavior in f2fs_gc -> gc_data_segment(): 0. fault injection generated some PageError'ed pages 1. gc_data_segment -> f2fs_get_read_data_page(REQ_RAHEAD) 2. move_data_page -> f2fs_get_lock_data_page() -> f2f_get_read_data_page() -> f2fs_submit_page_read() -> submit_bio(READ) -> return EIO due to PageError -> fail to move data Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 41fc5ff29f70..a6daf54c331e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -76,7 +76,8 @@ static void __read_end_io(struct bio *bio) /* PG_error was set if any post_read step failed */ if (bio->bi_error || PageError(page)) { ClearPageUptodate(page); - SetPageError(page); + /* will re-read again later */ + ClearPageError(page); } else { SetPageUptodate(page); } @@ -588,6 +589,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, bio_put(bio); return -EFAULT; } + ClearPageError(page); __submit_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1575,6 +1577,7 @@ submit_and_realloc: if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; + ClearPageError(page); last_block_in_bio = block_nr; goto next_page; set_error_page: From 99efe50ef6cf0ba523ee32274fe0d58bc16e175f Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 20 Aug 2018 19:21:43 -0700 Subject: [PATCH 052/189] f2fs: checkpoint disabling Note that, it requires "f2fs: return correct errno in f2fs_gc". This adds a lightweight non-persistent snapshotting scheme to f2fs. To use, mount with the option checkpoint=disable, and to return to normal operation, remount with checkpoint=enable. If the filesystem is shut down before remounting with checkpoint=enable, it will revert back to its apparent state when it was first mounted with checkpoint=disable. This is useful for situations where you wish to be able to roll back the state of the disk in case of some critical failure. Signed-off-by: Daniel Rosenberg [Jaegeuk Kim: use SB_RDONLY instead of MS_RDONLY] Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 5 ++ fs/f2fs/checkpoint.c | 12 +++ fs/f2fs/data.c | 14 +++- fs/f2fs/debug.c | 3 +- fs/f2fs/f2fs.h | 18 ++++- fs/f2fs/file.c | 12 ++- fs/f2fs/gc.c | 9 ++- fs/f2fs/inode.c | 6 +- fs/f2fs/namei.c | 19 +++++ fs/f2fs/segment.c | 98 ++++++++++++++++++++++- fs/f2fs/segment.h | 15 ++++ fs/f2fs/super.c | 124 ++++++++++++++++++++++++++++- include/linux/f2fs_fs.h | 1 + 13 files changed, 323 insertions(+), 13 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index c32641476f5e..348d80440433 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -212,6 +212,11 @@ fsync_mode=%s Control the policy of fsync. Currently supports "posix", non-atomic files likewise "nobarrier" mount option. test_dummy_encryption Enable dummy encryption, which provides a fake fscrypt context. The fake fscrypt context is used by xfstests. +checkpoint=%s Set to "disable" to turn off checkpointing. Set to "enable" + to reenable checkpointing. Is enabled by default. While + disabled, any unmounting or unexpected shutdowns will cause + the filesystem contents to appear as they did when the + filesystem was mounted with that option. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 96ad2358ddb2..598424c1c4b1 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1211,6 +1211,11 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) + __set_ckpt_flags(ckpt, CP_DISABLED_FLAG); + else + __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); @@ -1418,6 +1423,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + sbi->unusable_block_count = 0; __set_cp_next_pack(sbi); /* @@ -1442,6 +1448,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long long ckpt_ver; int err = 0; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (cpc->reason != CP_PAUSE) + return 0; + f2fs_msg(sbi->sb, KERN_WARNING, + "Start checkpoint disabled!"); + } mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a6daf54c331e..cdfdd6919dcc 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -534,7 +534,8 @@ skip: if (fio->in_list) goto next; out: - if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) + if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || + f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); up_write(&io->io_rwsem); } @@ -1699,6 +1700,10 @@ static inline bool check_inplace_update_policy(struct inode *inode, is_inode_flag_set(inode, FI_NEED_IPU)) return true; + if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) + return true; + return false; } @@ -1729,6 +1734,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) return true; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) + return true; } return false; } @@ -2349,6 +2357,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); + err = f2fs_is_checkpoint_ready(sbi); + if (err) + goto fail; + if ((f2fs_is_atomic_file(inode) && !f2fs_available_free_memory(sbi, INMEM_PAGES)) || is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) { diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 75bc62edc4c1..026e10f30889 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -272,7 +272,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n", si->sbi->sb->s_bdev, i++, f2fs_readonly(si->sbi->sb) ? "RO": "RW", - f2fs_cp_error(si->sbi) ? "Error": "Good"); + is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ? + "Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good")); seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", si->sit_area_segs, si->nat_area_segs); seq_printf(s, "[SSA: %d] [MAIN: %d", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 64be3fa73043..646012a70f4e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -101,6 +101,7 @@ extern char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_QUOTA 0x00400000 #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define F2FS_MOUNT_RESERVE_ROOT 0x01000000 +#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -246,6 +247,7 @@ enum { #define CP_RECOVERY 0x00000008 #define CP_DISCARD 0x00000010 #define CP_TRIMMED 0x00000020 +#define CP_PAUSE 0x00000040 #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ @@ -255,6 +257,7 @@ enum { #define DEF_DISCARD_URGENT_UTIL 80 /* do more discard over 80% */ #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ +#define DEF_DISABLE_INTERVAL 5 /* 5 secs */ struct cp_control { int reason; @@ -1157,6 +1160,7 @@ enum { SBI_NEED_CP, /* need to checkpoint */ SBI_IS_SHUTDOWN, /* shutdown by ioctl */ SBI_IS_RECOVERED, /* recovered orphan/data */ + SBI_CP_DISABLED, /* CP was disabled last mount */ }; enum { @@ -1164,6 +1168,7 @@ enum { REQ_TIME, DISCARD_TIME, GC_TIME, + DISABLE_TIME, MAX_TIME, }; @@ -1290,6 +1295,9 @@ struct f2fs_sb_info { block_t reserved_blocks; /* configurable reserved blocks */ block_t current_reserved_blocks; /* current reserved blocks */ + /* Additional tracking for no checkpoint mode */ + block_t unusable_block_count; /* # of blocks saved by last cp */ + unsigned int nquota_files; /* # of quota sysfile */ u32 s_next_generation; /* for NFS support */ @@ -1800,7 +1808,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + avail_user_block_count -= sbi->unusable_block_count; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) @@ -2007,6 +2016,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + valid_block_count += sbi->unusable_block_count; if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); @@ -3030,6 +3041,8 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi); bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); @@ -3617,6 +3630,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, if (test_opt(sbi, LFS) && (rw == WRITE) && block_unaligned_IO(inode, iocb, iter)) return true; + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) + return true; + return false; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9c8d2cb4585e..834f87e75dff 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -213,7 +213,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, }; unsigned int seq_id = 0; - if (unlikely(f2fs_readonly(inode->i_sb))) + if (unlikely(f2fs_readonly(inode->i_sb) || + is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return 0; trace_f2fs_sync_file_enter(inode); @@ -2157,6 +2158,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + f2fs_msg(sbi->sb, KERN_INFO, + "Skipping Checkpoint. Checkpoints currently disabled."); + return -EINVAL; + } + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -2528,6 +2535,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return -EINVAL; + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, sizeof(range))) return -EFAULT; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 4db16f7c6858..6cdcd79baeb7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -370,6 +370,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, if (sec_usage_check(sbi, secno)) goto next; + /* Don't touch checkpointed data */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && + get_ckpt_valid_blocks(sbi, segno))) + goto next; if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; @@ -1189,7 +1193,8 @@ gc_more: * threshold, we can make them free by checkpoint. Then, we * secure free segments which doesn't need fggc any more. */ - if (prefree_segments(sbi)) { + if (prefree_segments(sbi) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { ret = f2fs_write_checkpoint(sbi, &cpc); if (ret) goto stop; @@ -1241,7 +1246,7 @@ gc_more: segno = NULL_SEGNO; goto gc_more; } - if (gc_type == FG_GC) + if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) ret = f2fs_write_checkpoint(sbi, &cpc); } stop: diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 63e88eac5db3..ab845b558dd5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -607,6 +607,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; + if (f2fs_is_checkpoint_ready(sbi)) + return -ENOSPC; + /* * We need to balance fs here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. @@ -688,7 +691,8 @@ no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); - if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG))) + if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); else f2fs_inode_synced(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index f85241dc8140..b2cc6c315f62 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -16,6 +16,7 @@ #include "f2fs.h" #include "node.h" +#include "segment.h" #include "xattr.h" #include "acl.h" #include @@ -269,6 +270,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -316,6 +320,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = fscrypt_prepare_link(old_dentry, dir, dentry); if (err) @@ -559,6 +566,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize, &disk_link); @@ -690,6 +700,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; err = dquot_initialize(dir); if (err) @@ -825,6 +838,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, @@ -1015,6 +1031,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(f2fs_cp_error(sbi))) return -EIO; + err = f2fs_is_checkpoint_ready(sbi); + if (err) + return err; if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5dd7d7cecabf..1b2ebd9b986d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -176,6 +176,8 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) return false; if (sbi->gc_mode == GC_URGENT) return true; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return true; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); @@ -480,6 +482,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi); + if (f2fs_is_checkpoint_ready(sbi)) + return; + /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. @@ -796,7 +801,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned short valid_blocks; + unsigned short valid_blocks, ckpt_valid_blocks; if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno)) return; @@ -804,8 +809,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_lock(&dirty_i->seglist_lock); valid_blocks = get_valid_blocks(sbi, segno, false); + ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno); - if (valid_blocks == 0) { + if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) || + ckpt_valid_blocks == sbi->blocks_per_seg)) { __locate_dirty_segment(sbi, segno, PRE); __remove_dirty_segment(sbi, segno, DIRTY); } else if (valid_blocks < sbi->blocks_per_seg) { @@ -818,6 +825,66 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } +/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */ +void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + if (get_valid_blocks(sbi, segno, false)) + continue; + if (IS_CURSEG(sbi, segno)) + continue; + __locate_dirty_segment(sbi, segno, PRE); + __remove_dirty_segment(sbi, segno, DIRTY); + } + mutex_unlock(&dirty_i->seglist_lock); +} + +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg; + block_t holes[2] = {0, 0}; /* DATA and NODE */ + struct seg_entry *se; + unsigned int segno; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + se = get_seg_entry(sbi, segno); + if (IS_NODESEG(se->type)) + holes[NODE] += sbi->blocks_per_seg - se->valid_blocks; + else + holes[DATA] += sbi->blocks_per_seg - se->valid_blocks; + } + mutex_unlock(&dirty_i->seglist_lock); + + if (holes[DATA] > ovp || holes[NODE] > ovp) + return -EAGAIN; + return 0; +} + +/* This is only used by SBI_CP_DISABLED */ +static unsigned int get_free_segment(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int segno = 0; + + mutex_lock(&dirty_i->seglist_lock); + for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) { + if (get_valid_blocks(sbi, segno, false)) + continue; + if (get_ckpt_valid_blocks(sbi, segno)) + continue; + mutex_unlock(&dirty_i->seglist_lock); + return segno; + } + mutex_unlock(&dirty_i->seglist_lock); + return NULL_SEGNO; +} + static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) @@ -2108,7 +2175,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) sbi->discard_blks--; /* don't overwrite by SSR to keep node chain */ - if (IS_NODESEG(se->type)) { + if (IS_NODESEG(se->type) && + !is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) se->ckpt_valid_blocks++; } @@ -2130,6 +2198,15 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) f2fs_bug_on(sbi, 1); se->valid_blocks++; del = 0; + } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + /* + * If checkpoints are off, we must not reuse data that + * was used in the previous checkpoint. If it was used + * before, we must track that to know how much space we + * really have. + */ + if (f2fs_test_bit(offset, se->ckpt_valid_map)) + sbi->unusable_block_count++; } if (f2fs_test_and_clear_bit(offset, se->discard_map)) @@ -2412,6 +2489,9 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (sbi->segs_per_sec != 1) return CURSEG_I(sbi, type)->segno; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) return 0; @@ -2556,6 +2636,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) return 1; } } + + /* find valid_blocks=0 in dirty list */ + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + segno = get_free_segment(sbi); + if (segno != NULL_SEGNO) { + curseg->next_segno = segno; + return 1; + } + } return 0; } @@ -2573,7 +2662,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); - else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) && + likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) new_curseg(sbi, type, false); else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 086150028c6d..ab3465faddf1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -339,6 +339,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, return get_seg_entry(sbi, segno)->valid_blocks; } +static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; +} + static inline void seg_info_from_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { @@ -576,6 +582,15 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, reserved_sections(sbi) + needed); } +static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) +{ + if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; + if (likely(!has_not_enough_free_secs(sbi, 0, 0))) + return 0; + return -ENOSPC; +} + static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) { return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cca2ccbf18fe..41e0609e7e30 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -136,6 +136,7 @@ enum { Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, + Opt_checkpoint, Opt_err, }; @@ -194,6 +195,7 @@ static match_table_t f2fs_tokens = { {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, + {Opt_checkpoint, "checkpoint=%s"}, {Opt_err, NULL}, }; @@ -769,6 +771,23 @@ static int parse_options(struct super_block *sb, char *options) "Test dummy encryption mount option ignored"); #endif break; + case Opt_checkpoint: + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + if (strlen(name) == 6 && + !strncmp(name, "enable", 6)) { + clear_opt(sbi, DISABLE_CHECKPOINT); + } else if (strlen(name) == 7 && + !strncmp(name, "disable", 7)) { + set_opt(sbi, DISABLE_CHECKPOINT); + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -827,6 +846,12 @@ static int parse_options(struct super_block *sb, char *options) } } + if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { + f2fs_msg(sb, KERN_ERR, + "LFS not compatible with checkpoint=disable\n"); + return -EINVAL; + } + /* Not pass down write hints if the number of active logs is lesser * than NR_CURSEG_TYPE. */ @@ -1014,8 +1039,8 @@ static void f2fs_put_super(struct super_block *sb) * But, the previous checkpoint was not done by umount, it needs to do * clean checkpoint again. */ - if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || - !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { + if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) || + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) { struct cp_control cpc = { .reason = CP_UMOUNT, }; @@ -1088,6 +1113,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync) if (unlikely(f2fs_cp_error(sbi))) return 0; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) + return 0; trace_f2fs_sync_fs(sb, sync); @@ -1187,6 +1214,11 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) - sbi->current_reserved_blocks; + if (unlikely(buf->f_bfree <= sbi->unusable_block_count)) + buf->f_bfree = 0; + else + buf->f_bfree -= sbi->unusable_block_count; + if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks) buf->f_bavail = buf->f_bfree - F2FS_OPTION(sbi).root_reserved_blocks; @@ -1366,6 +1398,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE) seq_printf(seq, ",alloc_mode=%s", "reuse"); + if (test_opt(sbi, DISABLE_CHECKPOINT)) + seq_puts(seq, ",checkpoint=disable"); + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) seq_printf(seq, ",fsync_mode=%s", "posix"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) @@ -1394,6 +1429,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; + clear_opt(sbi, DISABLE_CHECKPOINT); set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); if (f2fs_sb_has_blkzoned(sbi->sb)) @@ -1414,6 +1450,57 @@ static void default_options(struct f2fs_sb_info *sbi) #ifdef CONFIG_QUOTA static int f2fs_enable_quotas(struct super_block *sb); #endif + +static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) +{ + struct cp_control cpc; + int err; + + sbi->sb->s_flags |= MS_ACTIVE; + + mutex_lock(&sbi->gc_mutex); + f2fs_update_time(sbi, DISABLE_TIME); + + while (!f2fs_time_over(sbi, DISABLE_TIME)) { + err = f2fs_gc(sbi, true, false, NULL_SEGNO); + if (err == -ENODATA) + break; + if (err && err != -EAGAIN) { + mutex_unlock(&sbi->gc_mutex); + return err; + } + } + mutex_unlock(&sbi->gc_mutex); + + err = sync_filesystem(sbi->sb); + if (err) + return err; + + if (f2fs_disable_cp_again(sbi)) + return -EAGAIN; + + mutex_lock(&sbi->gc_mutex); + cpc.reason = CP_PAUSE; + set_sbi_flag(sbi, SBI_CP_DISABLED); + f2fs_write_checkpoint(sbi, &cpc); + + sbi->unusable_block_count = 0; + mutex_unlock(&sbi->gc_mutex); + return 0; +} + +static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) +{ + mutex_lock(&sbi->gc_mutex); + f2fs_dirty_to_prefree(sbi); + + clear_sbi_flag(sbi, SBI_CP_DISABLED); + set_sbi_flag(sbi, SBI_IS_DIRTY); + mutex_unlock(&sbi->gc_mutex); + + f2fs_sync_fs(sbi->sb, 1); +} + static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -1423,6 +1510,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); + bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT); + bool checkpoint_changed; #ifdef CONFIG_QUOTA int i, j; #endif @@ -1467,6 +1556,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) err = parse_options(sb, data); if (err) goto restore_opts; + checkpoint_changed = + disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT); /* * Previous and new state of filesystem is RO, @@ -1500,6 +1591,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if ((*flags & MS_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { + err = -EINVAL; + f2fs_msg(sbi->sb, KERN_WARNING, + "disabling checkpoint not compatible with read-only"); + goto restore_opts; + } + /* * We stop the GC thread if FS is mounted as RO * or if background_gc = off is passed in mount @@ -1528,6 +1626,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) clear_sbi_flag(sbi, SBI_IS_CLOSE); } + if (checkpoint_changed) { + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto restore_gc; + } else { + f2fs_enable_checkpoint(sbi); + } + } + /* * We stop issue flush thread if FS is mounted as RO * or if flush_merge is not passed in mount option. @@ -2489,6 +2597,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL; sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL; + sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL; clear_sbi_flag(sbi, SBI_NEED_FSCK); for (i = 0; i < NR_COUNT_TYPE; i++) @@ -3100,6 +3209,9 @@ try_onemore: if (err) goto free_meta; + if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) + goto skip_recovery; + /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { /* @@ -3139,6 +3251,14 @@ skip_recovery: /* f2fs_recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + err = f2fs_disable_checkpoint(sbi); + if (err) + goto free_meta; + } else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) { + f2fs_enable_checkpoint(sbi); + } + /* * If filesystem is not mounted as read-only then * do start the gc_thread. diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a5a5bd94551e..6c2786db5154 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -116,6 +116,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_DISABLED_FLAG 0x00001000 #define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 #define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 From 4bb9f775d5ec065c2ac04931839466a6eb34c05f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 13 Sep 2018 07:40:53 +0800 Subject: [PATCH 053/189] f2fs: submit cached bio to avoid endless PageWriteback When migrating encrypted block from background GC thread, we only add them into f2fs inner bio cache, but forget to submit the cached bio, it may cause potential deadlock when we are waiting page writebacked, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 71 +++++++++++++++++++++++++++++++++++--------------- fs/f2fs/node.c | 13 ++++++--- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 646012a70f4e..eb83794e9b38 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2996,7 +2996,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs); void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); struct page *f2fs_get_node_page_ra(struct page *parent, int start); -void f2fs_move_node_page(struct page *node_page, int gc_type); +int f2fs_move_node_page(struct page *node_page, int gc_type); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6cdcd79baeb7..843430b55a73 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -473,7 +473,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi, * On validity, copy that node with cold status, otherwise (invalid node) * ignore that. */ -static void gc_node_segment(struct f2fs_sb_info *sbi, +static int gc_node_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, unsigned int segno, int gc_type) { struct f2fs_summary *entry; @@ -481,6 +481,7 @@ static void gc_node_segment(struct f2fs_sb_info *sbi, int off; int phase = 0; bool fggc = (gc_type == FG_GC); + int submitted = 0; start_addr = START_BLOCK(sbi, segno); @@ -494,10 +495,11 @@ next_step: nid_t nid = le32_to_cpu(entry->nid); struct page *node_page; struct node_info ni; + int err; /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) - return; + return submitted; if (check_valid_map(sbi, segno, off) == 0) continue; @@ -534,7 +536,9 @@ next_step: continue; } - f2fs_move_node_page(node_page, gc_type); + err = f2fs_move_node_page(node_page, gc_type); + if (!err && gc_type == FG_GC) + submitted++; stat_inc_node_blk_count(sbi, 1, gc_type); } @@ -543,6 +547,7 @@ next_step: if (fggc) atomic_dec(&sbi->wb_sync_req[NODE]); + return submitted; } /* @@ -678,7 +683,7 @@ put_page: * Move data block via META_MAPPING while keeping locked data page. * This can be used to move blocks, aka LBAs, directly on disk. */ -static void move_data_block(struct inode *inode, block_t bidx, +static int move_data_block(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { struct f2fs_io_info fio = { @@ -697,25 +702,29 @@ static void move_data_block(struct inode *inode, block_t bidx, struct node_info ni; struct page *page, *mpage; block_t newaddr; - int err; + int err = 0; bool lfs_mode = test_opt(fio.sbi, LFS); /* do not read out */ page = f2fs_grab_cache_page(inode->i_mapping, bidx, false); if (!page) - return; + return -ENOMEM; - if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { + err = -ENOENT; goto out; + } if (f2fs_is_atomic_file(inode)) { F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; + err = -EAGAIN; goto out; } if (f2fs_is_pinned_file(inode)) { f2fs_pin_file_control(inode, true); + err = -EAGAIN; goto out; } @@ -726,6 +735,7 @@ static void move_data_block(struct inode *inode, block_t bidx, if (unlikely(dn.data_blkaddr == NULL_ADDR)) { ClearPageUptodate(page); + err = -ENOENT; goto put_out; } @@ -808,6 +818,7 @@ write_page: fio.new_blkaddr = newaddr; f2fs_submit_page_write(&fio); if (fio.retry) { + err = -EAGAIN; if (PageWriteback(fio.encrypted_page)) end_page_writeback(fio.encrypted_page); goto put_page_out; @@ -831,34 +842,42 @@ put_out: f2fs_put_dnode(&dn); out: f2fs_put_page(page, 1); + return err; } -static void move_data_page(struct inode *inode, block_t bidx, int gc_type, +static int move_data_page(struct inode *inode, block_t bidx, int gc_type, unsigned int segno, int off) { struct page *page; + int err = 0; page = f2fs_get_lock_data_page(inode, bidx, true); if (IS_ERR(page)) - return; + return PTR_ERR(page); - if (!check_valid_map(F2FS_I_SB(inode), segno, off)) + if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { + err = -ENOENT; goto out; + } if (f2fs_is_atomic_file(inode)) { F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++; F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++; + err = -EAGAIN; goto out; } if (f2fs_is_pinned_file(inode)) { if (gc_type == FG_GC) f2fs_pin_file_control(inode, true); + err = -EAGAIN; goto out; } if (gc_type == BG_GC) { - if (PageWriteback(page)) + if (PageWriteback(page)) { + err = -EAGAIN; goto out; + } set_page_dirty(page); set_cold_data(page); } else { @@ -876,7 +895,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .io_type = FS_GC_DATA_IO, }; bool is_dirty = PageDirty(page); - int err; retry: set_page_dirty(page); @@ -901,6 +919,7 @@ retry: } out: f2fs_put_page(page, 1); + return err; } /* @@ -910,7 +929,7 @@ out: * If the parent node is not valid or the data block address is different, * the victim data block is ignored. */ -static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; @@ -918,6 +937,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t start_addr; int off; int phase = 0; + int submitted = 0; start_addr = START_BLOCK(sbi, segno); @@ -934,7 +954,7 @@ next_step: /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) - return; + return submitted; if (check_valid_map(sbi, segno, off) == 0) continue; @@ -1006,6 +1026,7 @@ next_step: if (inode) { struct f2fs_inode_info *fi = F2FS_I(inode); bool locked = false; + int err; if (S_ISREG(inode->i_mode)) { if (!down_write_trylock(&fi->i_gc_rwsem[READ])) @@ -1025,12 +1046,16 @@ next_step: start_bidx = f2fs_start_bidx_of_node(nofs, inode) + ofs_in_node; if (f2fs_post_read_required(inode)) - move_data_block(inode, start_bidx, gc_type, - segno, off); + err = move_data_block(inode, start_bidx, + gc_type, segno, off); else - move_data_page(inode, start_bidx, gc_type, + err = move_data_page(inode, start_bidx, gc_type, segno, off); + if (!err && (gc_type == FG_GC || + f2fs_post_read_required(inode))) + submitted++; + if (locked) { up_write(&fi->i_gc_rwsem[WRITE]); up_write(&fi->i_gc_rwsem[READ]); @@ -1042,6 +1067,8 @@ next_step: if (++phase < 5) goto next_step; + + return submitted; } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, @@ -1069,6 +1096,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, int seg_freed = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; + int submitted = 0; /* readahead multi ssa blocks those have contiguous address */ if (sbi->segs_per_sec > 1) @@ -1124,10 +1152,11 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, * - lock_page(sum_page) */ if (type == SUM_TYPE_NODE) - gc_node_segment(sbi, sum->entries, segno, gc_type); - else - gc_data_segment(sbi, sum->entries, gc_list, segno, + submitted += gc_node_segment(sbi, sum->entries, segno, gc_type); + else + submitted += gc_data_segment(sbi, sum->entries, gc_list, + segno, gc_type); stat_inc_seg_count(sbi, type, gc_type); @@ -1138,7 +1167,7 @@ next: f2fs_put_page(sum_page, 0); } - if (gc_type == FG_GC) + if (submitted) f2fs_submit_merged_write(sbi, (type == SUM_TYPE_NODE) ? NODE : DATA); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c1936432154f..b76dd5a06c29 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1587,8 +1587,10 @@ redirty_out: return AOP_WRITEPAGE_ACTIVATE; } -void f2fs_move_node_page(struct page *node_page, int gc_type) +int f2fs_move_node_page(struct page *node_page, int gc_type) { + int err = 0; + if (gc_type == FG_GC) { struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, @@ -1600,12 +1602,16 @@ void f2fs_move_node_page(struct page *node_page, int gc_type) f2fs_wait_on_page_writeback(node_page, NODE, true); f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); - if (!clear_page_dirty_for_io(node_page)) + if (!clear_page_dirty_for_io(node_page)) { + err = -EAGAIN; goto out_page; + } if (__write_node_page(node_page, false, NULL, - &wbc, false, FS_GC_NODE_IO, NULL)) + &wbc, false, FS_GC_NODE_IO, NULL)) { + err = -EAGAIN; unlock_page(node_page); + } goto release_page; } else { /* set page dirty and write it */ @@ -1616,6 +1622,7 @@ out_page: unlock_page(node_page); release_page: f2fs_put_page(node_page, 0); + return err; } static int f2fs_write_node_page(struct page *page, From 329676473320d309bb267267645b1717ef4bfa4e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 3 Oct 2018 22:32:44 +0800 Subject: [PATCH 054/189] f2fs: fix to recover cold bit of inode block during POR Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +A /mnt/f2fs/file 6. xfs_io -f /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. chattr -A /mnt/f2fs/file 11. xfs_io -f /mnt/f2fs/file -c "fsync" 12. umount /mnt/f2fs 13. mount -t f2fs /dev/sdd /mnt/f2fs 14. lsattr /mnt/f2fs/file -----------------N- /mnt/f2fs/file But actually, we expect the corrct result is: -------A---------N- /mnt/f2fs/file The reason is in step 9) we missed to recover cold bit flag in inode block, so later, in fsync, we will skip write inode block due to below condition check, result in lossing data in another SPOR. f2fs_fsync_node_pages() if (!IS_DNODE(page) || !is_cold_node(page)) continue; Note that, I guess that some non-dir inode has already lost cold bit during POR, so in order to reenable recovery for those inode, let's try to recover cold bit in f2fs_iget() to save more fsynced data. Fixes: c56675750d7c ("f2fs: remove unneeded set_cold_node()") Cc: 4.17+ Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 6 ++++++ fs/f2fs/node.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ab845b558dd5..4408feee9488 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -365,6 +365,12 @@ static int do_read_inode(struct inode *inode) if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); + /* try to recover cold bit for non-dir inode */ + if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) { + set_cold_node(node_page, false); + set_page_dirty(node_page); + } + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b76dd5a06c29..adb6f8ee686c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2552,7 +2552,7 @@ retry: if (!PageUptodate(ipage)) SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); - set_cold_node(page, false); + set_cold_node(ipage, false); src = F2FS_INODE(page); dst = F2FS_INODE(ipage); From 9f2917f2bb5d8a77e79a356128b2e65fb98b5e18 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Oct 2018 11:15:18 +0800 Subject: [PATCH 055/189] f2fs: shrink sbi->sb_lock coverage in set_file_temperature() file_set_{cold,hot} doesn't need holding sbi->sb_lock, so moving them out of the lock. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index b2cc6c315f62..7b522c85a1a3 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -182,16 +182,19 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * hot_count = sbi->raw_super->hot_ext_count; for (i = 0; i < cold_count + hot_count; i++) { - if (!is_extension_exist(name, extlist[i])) - continue; - if (i < cold_count) - file_set_cold(inode); - else - file_set_hot(inode); - break; + if (is_extension_exist(name, extlist[i])) + break; } up_read(&sbi->sb_lock); + + if (i == cold_count + hot_count) + return; + + if (i < cold_count) + file_set_cold(inode); + else + file_set_hot(inode); } int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, From 3dd704fff9483e9aa682ebd6487928ee9c0cf76c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Oct 2018 11:15:19 +0800 Subject: [PATCH 056/189] f2fs: remove unused sbi->trigger_ssr_threshold Commit a2a12b679f36 ("f2fs: export SSR allocation threshold") introduced two threshold .min_ssr_sections and .trigger_ssr_threshold, but only .min_ssr_sections is used, so just remove redundant one for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index eb83794e9b38..2a5271717dc7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1285,7 +1285,6 @@ struct f2fs_sb_info { unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ int dir_level; /* directory level */ - unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ int readdir_ra; /* readahead inode in readdir */ block_t user_block_count; /* # of user blocks */ From 2dc22029a7e51dade5cddc06acc29bcec60263d2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 4 Oct 2018 11:15:20 +0800 Subject: [PATCH 057/189] f2fs: remove unneeded disable_nat_bits() Commit 7735730d39d7 ("f2fs: fix to propagate error from __get_meta_page()") added disable_nat_bits() in error path of __get_nat_bitmaps(), but it's unneeded, beause we will fail mount, we won't have chance to change nid usage status w/o nat full/empty bitmaps. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index adb6f8ee686c..08edc0930831 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2882,10 +2882,8 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) struct page *page; page = f2fs_get_meta_page(sbi, nat_bits_addr++); - if (IS_ERR(page)) { - disable_nat_bits(sbi, true); + if (IS_ERR(page)) return PTR_ERR(page); - } memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), page_address(page), F2FS_BLKSIZE); From 4aa5ef7fb109be395e9ebe06804d3d77c4bc8b68 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 5 Oct 2018 10:47:39 +0530 Subject: [PATCH 058/189] f2fs: do not update REQ_TIME in case of error conditions The REQ_TIME should be updated only in case of success cases as followed at all other places in the file system. Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a3302e2888dc..317820239111 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -655,9 +655,9 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) f2fs_put_page(page, 1); clear_inode_flag(inode, FI_NEW_INODE); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); fail: up_write(&F2FS_I(inode)->i_sem); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return err; } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 834f87e75dff..040c9667347e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -109,6 +109,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, SetPageUptodate(page); f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + f2fs_update_time(sbi, REQ_TIME); trace_f2fs_vm_page_mkwrite(page, DATA); out_sem: @@ -117,7 +118,6 @@ out_sem: f2fs_balance_fs(sbi, dn.node_changed); sb_end_pagefault(inode->i_sb); - f2fs_update_time(sbi, REQ_TIME); err: return block_page_mkwrite_return(err); } From fe2b3bc0fc9f37f6e672cce4d969c8b5020f4380 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Fri, 5 Oct 2018 10:47:40 +0530 Subject: [PATCH 059/189] f2fs: update REQ_TIME in f2fs_cross_rename() Update REQ_TIME in the missing path - f2fs_cross_rename(). Signed-off-by: Sahitya Tummala [Jaegeuk Kim: add it in f2fs_rename()] Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7b522c85a1a3..2d2924696174 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1001,6 +1001,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_update_time(sbi, REQ_TIME); return 0; put_out_dir: @@ -1158,6 +1160,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_update_time(sbi, REQ_TIME); return 0; out_new_dir: if (new_dir_entry) { From 05d4dcf63d564bf54f3bb2f310c1eff31ba5de02 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 2 Oct 2018 17:20:58 -0700 Subject: [PATCH 060/189] f2fs: allow to mount, if quota is failed Since we can use the filesystem without quotas till next boot. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41e0609e7e30..53d549f0145a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3197,11 +3197,9 @@ try_onemore: /* Enable quota usage during mount */ if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); - if (err) { + if (err) f2fs_msg(sb, KERN_ERR, "Cannot turn on quotas: error %d", err); - goto free_sysfs; - } } #endif /* if there are nt orphan nodes free them */ @@ -3302,9 +3300,6 @@ free_meta: * falls into an infinite loop in f2fs_sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); -#ifdef CONFIG_QUOTA -free_sysfs: -#endif f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); From c4b87d0c1ca217e4f925034b213e57995e2bd986 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 16 Oct 2018 08:34:50 -0600 Subject: [PATCH 061/189] f2fs: remove request_list check in is_idle() This doesn't work on stacked devices, and it doesn't work on blk-mq devices. The request_list is only used on legacy, which we don't have much of anymore, and soon won't have any of. Kill the check. Cc: Jaegeuk Kim Cc: linux-f2fs-devel@lists.sourceforge.net Signed-off-by: Jens Axboe Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2a5271717dc7..90c99b7013bd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1459,13 +1459,6 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { - struct block_device *bdev = sbi->sb->s_bdev; - struct request_queue *q = bdev_get_queue(bdev); - struct request_list *rl = &q->root_rl; - - if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC]) - return false; - return f2fs_time_over(sbi, type); } From fb363c5db4200af3eaaa1665aa2290f8a4121b24 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Oct 2018 09:12:51 +0800 Subject: [PATCH 062/189] f2fs: fix to account IO correctly Below race can cause reversed reference on dirty count, fix it by relocating __submit_bio() and inc_page_count(). Thread A Thread B - f2fs_inplace_write_data - f2fs_submit_page_bio - __submit_bio - f2fs_write_end_io - dec_page_count - inc_page_count Cc: Fixes: d1b3e72d5490 ("f2fs: submit bio of in-place-update pages") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cdfdd6919dcc..49cfa4859291 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -459,10 +459,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) } bio_set_op_attrs(bio, fio->op, fio->op_flags); - __submit_bio(fio->sbi, bio, fio->type); - if (!is_read_io(fio->op)) inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + + __submit_bio(fio->sbi, bio, fio->type); return 0; } From 9204be8a482b55f71abce48baddb24c8a8f0e424 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 22 Oct 2018 23:24:28 +0800 Subject: [PATCH 063/189] f2fs: fix to account IO correctly for cgroup writeback Now, we have supported cgroup writeback, it depends on correctly IO account of specified filesystem. But in commit d1b3e72d5490 ("f2fs: submit bio of in-place-update pages"), we split write paths from f2fs_submit_page_mbio() to two: - f2fs_submit_page_bio() for IPU path - f2fs_submit_page_bio() for OPU path But still we account write IO only in f2fs_submit_page_mbio(), result in incorrect IO account, fix it by adding missing IO account in IPU path. Fixes: d1b3e72d5490 ("f2fs: submit bio of in-place-update pages") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 49cfa4859291..326b6955bd2a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -457,6 +457,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_put(bio); return -EFAULT; } + + if (fio->io_wbc && !is_read_io(fio->op)) + wbc_account_io(fio->io_wbc, page, PAGE_SIZE); + bio_set_op_attrs(bio, fio->op, fio->op_flags); if (!is_read_io(fio->op)) From 003e915c0196b0f6a913947c3f9cf51ef54a26f7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 16 Oct 2018 10:20:53 -0700 Subject: [PATCH 064/189] f2fs: account read IOs and use IO counts for is_idle This patch adds issued read IO counts which is under block layer. Chao modified a bit, since: Below race can cause reversed reference on F2FS_RD_DATA, there is the same issue in f2fs_submit_page_bio(), fix them by relocate __submit_bio() and inc_page_count. Thread A Thread B - f2fs_write_begin - f2fs_submit_page_read - __submit_bio - f2fs_read_end_io - __read_end_io - dec_page_count(, F2FS_RD_DATA) - inc_page_count(, F2FS_RD_DATA) Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 24 ++++++++++++++++++++++-- fs/f2fs/debug.c | 7 ++++++- fs/f2fs/f2fs.h | 22 ++++++++++++++++------ 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 326b6955bd2a..a762d4e5d2c6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -51,6 +51,23 @@ static bool __is_cp_guaranteed(struct page *page) return false; } +static enum count_type __read_io_type(struct page *page) +{ + struct address_space *mapping = page->mapping; + + if (mapping) { + struct inode *inode = mapping->host; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (inode->i_ino == F2FS_META_INO(sbi)) + return F2FS_RD_META; + + if (inode->i_ino == F2FS_NODE_INO(sbi)) + return F2FS_RD_NODE; + } + return F2FS_RD_DATA; +} + /* postprocessing steps for read bios */ enum bio_post_read_step { STEP_INITIAL = 0, @@ -81,6 +98,7 @@ static void __read_end_io(struct bio *bio) } else { SetPageUptodate(page); } + dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } if (bio->bi_private) @@ -463,8 +481,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_set_op_attrs(bio, fio->op, fio->op_flags); - if (!is_read_io(fio->op)) - inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); + inc_page_count(fio->sbi, is_read_io(fio->op) ? + __read_io_type(page): WB_DATA_TYPE(fio->page)); __submit_bio(fio->sbi, bio, fio->type); return 0; @@ -595,6 +613,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, return -EFAULT; } ClearPageError(page); + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); __submit_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1582,6 +1601,7 @@ submit_and_realloc: if (bio_add_page(bio, page, blocksize, 0) < blocksize) goto submit_and_realloc; + inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); ClearPageError(page); last_block_in_bio = block_nr; goto next_page; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 026e10f30889..139b4d5c83d5 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -55,6 +55,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); + si->nr_rd_data = get_pages(sbi, F2FS_RD_DATA); + si->nr_rd_node = get_pages(sbi, F2FS_RD_NODE); + si->nr_rd_meta = get_pages(sbi, F2FS_RD_META); if (SM_I(sbi) && SM_I(sbi)->fcc_info) { si->nr_flushed = atomic_read(&SM_I(sbi)->fcc_info->issued_flush); @@ -371,7 +374,9 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " + seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n", + si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta); + seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 90c99b7013bd..07fa071a66f5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1015,6 +1015,9 @@ enum count_type { F2FS_DIRTY_IMETA, F2FS_WB_CP_DATA, F2FS_WB_DATA, + F2FS_RD_DATA, + F2FS_RD_NODE, + F2FS_RD_META, NR_COUNT_TYPE, }; @@ -1457,11 +1460,6 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, return wait_ms; } -static inline bool is_idle(struct f2fs_sb_info *sbi, int type) -{ - return f2fs_time_over(sbi, type); -} - /* * Inline functions */ @@ -1852,7 +1850,9 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) atomic_inc(&sbi->nr_pages[count_type]); if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES || - count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA) + count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA || + count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE || + count_type == F2FS_RD_META) return; set_sbi_flag(sbi, SBI_IS_DIRTY); @@ -2189,6 +2189,15 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, return bio_alloc(GFP_KERNEL, npages); } +static inline bool is_idle(struct f2fs_sb_info *sbi, int type) +{ + if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || + get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || + get_pages(sbi, F2FS_WB_CP_DATA)) + return false; + return f2fs_time_over(sbi, type); +} + static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, unsigned long index, void *item) { @@ -3199,6 +3208,7 @@ struct f2fs_stat_info { int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; + int nr_rd_data, nr_rd_node, nr_rd_meta; unsigned int io_skip_bggc, other_skip_bggc; int nr_flushing, nr_flushed, flush_list_empty; int nr_discarding, nr_discarded; From 0246fe9d2e9ad87b5ec7186a09566b8ce96b19cf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 16 Oct 2018 19:30:13 -0700 Subject: [PATCH 065/189] Revert "f2fs: fix to clear PG_checked flag in set_page_dirty()" This reverts commit 66110abc4c931f879d70e83e1281f891699364bf. If we clear the cold data flag out of the writeback flow, we can miscount -1 by end_io, which incurs a deadlock caused by all I/Os being blocked during heavy GC. Balancing F2FS Async: - IO (CP: 1, Data: -1, Flush: ( 0 0 1), Discard: ( ... GC thread: IRQ - move_data_page() - set_page_dirty() - clear_cold_data() - f2fs_write_end_io() - type = WB_DATA_TYPE(page); here, we get wrong type - dec_page_count(sbi, type); - f2fs_wait_on_page_writeback() Cc: Reported-and-Tested-by: Park Ju Hyung Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a762d4e5d2c6..dacfc4aa7e38 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2650,10 +2650,6 @@ static int f2fs_set_data_page_dirty(struct page *page) if (!PageUptodate(page)) SetPageUptodate(page); - /* don't remain PG_checked flag which was set during GC */ - if (is_cold_data(page)) - clear_cold_data(page); - if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { f2fs_register_inmem_page(inode, page); From fbfc2e102ca7fca46eecc8a73d2b7d2a5d81c9c7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 27 Jul 2018 18:15:16 +0800 Subject: [PATCH 066/189] f2fs: fix to spread clear_cold_data() We need to drop PG_checked flag on page as well when we clear PG_uptodate flag, in order to avoid treating the page as GCing one later. Signed-off-by: Weichao Guo Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 +++++++- fs/f2fs/dir.c | 1 + fs/f2fs/segment.c | 4 +++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index dacfc4aa7e38..231876a9d647 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1812,6 +1812,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { ClearPageUptodate(page); + clear_cold_data(page); goto out_writepage; } got_it: @@ -1987,8 +1988,10 @@ done: out: inode_dec_dirty_pages(inode); - if (err) + if (err) { ClearPageUptodate(page); + clear_cold_data(page); + } if (wbc->for_reclaim) { f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); @@ -2617,6 +2620,8 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, } } + clear_cold_data(page); + /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) return f2fs_drop_inmem_page(inode, page); @@ -2635,6 +2640,7 @@ int f2fs_release_page(struct page *page, gfp_t wait) if (IS_ATOMIC_WRITTEN_PAGE(page)) return 0; + clear_cold_data(page); set_page_private(page, 0); ClearPagePrivate(page); return 1; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 317820239111..eedc07b46a52 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -730,6 +730,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, clear_page_dirty_for_io(page); ClearPagePrivate(page); ClearPageUptodate(page); + clear_cold_data(page); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1b2ebd9b986d..e4305fcb6265 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -266,8 +266,10 @@ retry: } next: /* we don't need to invalidate this in the sccessful status */ - if (drop || recover) + if (drop || recover) { ClearPageUptodate(page); + clear_cold_data(page); + } set_page_private(page, 0); ClearPagePrivate(page); f2fs_put_page(page, 1); From 6a23a85581000e723c45ff2541237adc47822702 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 7 Oct 2018 19:06:15 +0800 Subject: [PATCH 067/189] f2fs: spread f2fs_set_inode_flags() This patch changes codes as below: - use f2fs_set_inode_flags() to update i_flags atomically to avoid potential race. - synchronize F2FS_I(inode)->i_flags to inode->i_flags in f2fs_new_inode(). - use f2fs_set_inode_flags() to simply codes in f2fs_quota_{on,off}. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/namei.c | 2 ++ fs/f2fs/super.c | 5 ++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 07fa071a66f5..efe56091b07c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3506,7 +3506,7 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION file_set_encrypt(inode); - inode->i_flags |= S_ENCRYPTED; + f2fs_set_inode_flags(inode); #endif } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2d2924696174..dd1748c5e7f0 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -122,6 +122,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); + f2fs_set_inode_flags(inode); + trace_f2fs_new_inode(inode, 0); return inode; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 53d549f0145a..4693e4c6d8b6 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1940,8 +1940,7 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, inode_lock(inode); F2FS_I(inode)->i_flags |= F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL; - inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, - S_NOATIME | S_IMMUTABLE); + f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); @@ -1966,7 +1965,7 @@ static int f2fs_quota_off(struct super_block *sb, int type) inode_lock(inode); F2FS_I(inode)->i_flags &= ~(F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL); - inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); out_put: From 797d94b07ea05f1b58c8056ca80e0712591fee04 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 7 Oct 2018 03:03:38 +0800 Subject: [PATCH 068/189] f2fs: fix to recover inode->i_flags of inode block during POR Testcase to reproduce this bug: 1. mkfs.f2fs /dev/sdd 2. mount -t f2fs /dev/sdd /mnt/f2fs 3. touch /mnt/f2fs/file 4. sync 5. chattr +a /mnt/f2fs/file 6. xfs_io -a /mnt/f2fs/file -c "fsync" 7. godown /mnt/f2fs 8. umount /mnt/f2fs 9. mount -t f2fs /dev/sdd /mnt/f2fs 10. xfs_io /mnt/f2fs/file There is no error when opening this file w/o O_APPEND, but actually, we expect the correct result should be: /mnt/f2fs/file: Operation not permitted The root cause is, in recover_inode(), we recover inode->i_flags more than F2FS_I(inode)->i_flags, so fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b069b7d7403d..7899a9e8a6e0 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -234,6 +234,7 @@ static void recover_inode(struct inode *inode, struct page *page) F2FS_I(inode)->i_advise = raw->i_advise; F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags); + f2fs_set_inode_flags(inode); F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = le16_to_cpu(raw->i_gc_failures); From f8a7408381a446f046f7e4f01f247aeb39511050 Mon Sep 17 00:00:00 2001 From: Sahitya Tummala Date: Wed, 10 Oct 2018 10:56:22 +0530 Subject: [PATCH 069/189] f2fs: fix data corruption issue with hardware encryption Direct IO can be used in case of hardware encryption. The following scenario results into data corruption issue in this path - Thread A - Thread B- -> write file#1 in direct IO -> GC gets kicked in -> GC submitted bio on meta mapping for file#1, but pending completion -> write file#1 again with new data in direct IO -> GC bio gets completed now -> GC writes old data to the new location and thus file#1 is corrupted. Fix this by submitting and waiting for pending io on meta mapping for direct IO case in f2fs_map_blocks(). Signed-off-by: Sahitya Tummala Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +++++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 9 +++++++++ 3 files changed, 22 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 231876a9d647..47fdb3aa4b21 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1050,6 +1050,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, map->m_flags = F2FS_MAP_MAPPED; if (map->m_next_extent) *map->m_next_extent = pgofs + map->m_len; + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); goto out; } @@ -1208,6 +1213,12 @@ skip: goto next_dnode; sync_out: + + /* for hardware encryption, but to avoid potential issue in future */ + if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) + f2fs_wait_on_block_writeback_range(inode, + map->m_pblk, map->m_len); + if (flag == F2FS_GET_BLOCK_PRECACHE) { if (map->m_flags & F2FS_MAP_MAPPED) { unsigned int ofs = start_pgofs - map->m_lblk; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index efe56091b07c..b1c2d097073b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3073,6 +3073,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len); void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e4305fcb6265..fb4be4629add 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3380,6 +3380,15 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) } } +void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, + block_t len) +{ + block_t i; + + for (i = 0; i < len; i++) + f2fs_wait_on_block_writeback(inode, blkaddr + i); +} + static int read_compacted_summaries(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); From ba406b630883486ac915f6d2261873dacd648f7d Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Fri, 12 Oct 2018 18:49:26 +0800 Subject: [PATCH 070/189] f2fs: cleanup dirty pages if recover failed During recover, we will try to create new dentries for inodes with dentry_mark. But if the parent is missing (e.g. killed by fsck), recover will break. But those recovered dirty pages are not cleanup. This will hit f2fs_bug_on: [ 53.519566] F2FS-fs (loop0): Found nat_bits in checkpoint [ 53.539354] F2FS-fs (loop0): recover_inode: ino = 5, name = file, inline = 3 [ 53.539402] F2FS-fs (loop0): recover_dentry: ino = 5, name = file, dir = 0, err = -2 [ 53.545760] F2FS-fs (loop0): Cannot recover all fsync data errno=-2 [ 53.546105] F2FS-fs (loop0): access invalid blkaddr:4294967295 [ 53.546171] WARNING: CPU: 1 PID: 1798 at fs/f2fs/checkpoint.c:163 f2fs_is_valid_blkaddr+0x26c/0x320 [ 53.546174] Modules linked in: [ 53.546183] CPU: 1 PID: 1798 Comm: mount Not tainted 4.19.0-rc2+ #1 [ 53.546186] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 53.546191] RIP: 0010:f2fs_is_valid_blkaddr+0x26c/0x320 [ 53.546195] Code: 85 bb 00 00 00 48 89 df 88 44 24 07 e8 ad a8 db ff 48 8b 3b 44 89 e1 48 c7 c2 40 03 72 a9 48 c7 c6 e0 01 72 a9 e8 84 3c ff ff <0f> 0b 0f b6 44 24 07 e9 8a 00 00 00 48 8d bf 38 01 00 00 e8 7c a8 [ 53.546201] RSP: 0018:ffff88006c067768 EFLAGS: 00010282 [ 53.546208] RAX: 0000000000000000 RBX: ffff880068844200 RCX: ffffffffa83e1a33 [ 53.546211] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff88006d51e590 [ 53.546215] RBP: 0000000000000005 R08: ffffed000daa3cb3 R09: ffffed000daa3cb3 [ 53.546218] R10: 0000000000000001 R11: ffffed000daa3cb2 R12: 00000000ffffffff [ 53.546221] R13: ffff88006a1f8000 R14: 0000000000000200 R15: 0000000000000009 [ 53.546226] FS: 00007fb2f3646840(0000) GS:ffff88006d500000(0000) knlGS:0000000000000000 [ 53.546229] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.546234] CR2: 00007f0fd77f0008 CR3: 00000000687e6002 CR4: 00000000000206e0 [ 53.546237] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 53.546240] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 53.546242] Call Trace: [ 53.546248] f2fs_submit_page_bio+0x95/0x740 [ 53.546253] read_node_page+0x161/0x1e0 [ 53.546271] ? truncate_node+0x650/0x650 [ 53.546283] ? add_to_page_cache_lru+0x12c/0x170 [ 53.546288] ? pagecache_get_page+0x262/0x2d0 [ 53.546292] __get_node_page+0x200/0x660 [ 53.546302] f2fs_update_inode_page+0x4a/0x160 [ 53.546306] f2fs_write_inode+0x86/0xb0 [ 53.546317] __writeback_single_inode+0x49c/0x620 [ 53.546322] writeback_single_inode+0xe4/0x1e0 [ 53.546326] sync_inode_metadata+0x93/0xd0 [ 53.546330] ? sync_inode+0x10/0x10 [ 53.546342] ? do_raw_spin_unlock+0xed/0x100 [ 53.546347] f2fs_sync_inode_meta+0xe0/0x130 [ 53.546351] f2fs_fill_super+0x287d/0x2d10 [ 53.546367] ? vsnprintf+0x742/0x7a0 [ 53.546372] ? f2fs_commit_super+0x180/0x180 [ 53.546379] ? up_write+0x20/0x40 [ 53.546385] ? set_blocksize+0x5f/0x140 [ 53.546391] ? f2fs_commit_super+0x180/0x180 [ 53.546402] mount_bdev+0x181/0x200 [ 53.546406] mount_fs+0x94/0x180 [ 53.546411] vfs_kern_mount+0x6c/0x1e0 [ 53.546415] do_mount+0xe5e/0x1510 [ 53.546420] ? fs_reclaim_release+0x9/0x30 [ 53.546424] ? copy_mount_string+0x20/0x20 [ 53.546428] ? fs_reclaim_acquire+0xd/0x30 [ 53.546435] ? __might_sleep+0x2c/0xc0 [ 53.546440] ? ___might_sleep+0x53/0x170 [ 53.546453] ? __might_fault+0x4c/0x60 [ 53.546468] ? _copy_from_user+0x95/0xa0 [ 53.546474] ? memdup_user+0x39/0x60 [ 53.546478] ksys_mount+0x88/0xb0 [ 53.546482] __x64_sys_mount+0x5d/0x70 [ 53.546495] do_syscall_64+0x65/0x130 [ 53.546503] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 53.547639] ---[ end trace b804d1ea2fec893e ]--- So if recover fails, we need to drop all recovered data. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 32 +++++++++++++++++++++----------- fs/f2fs/super.c | 14 +++++++++++++- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 7899a9e8a6e0..b7173ae30c75 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -96,8 +96,12 @@ err_out: return ERR_PTR(err); } -static void del_fsync_inode(struct fsync_inode_entry *entry) +static void del_fsync_inode(struct fsync_inode_entry *entry, int drop) { + if (drop) { + /* inode should not be recovered, drop it */ + f2fs_inode_synced(entry->inode); + } iput(entry->inode); list_del(&entry->list); kmem_cache_free(fsync_entry_slab, entry); @@ -338,12 +342,12 @@ next: return err; } -static void destroy_fsync_dnodes(struct list_head *head) +static void destroy_fsync_dnodes(struct list_head *head, int drop) { struct fsync_inode_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, head, list) - del_fsync_inode(entry); + del_fsync_inode(entry, drop); } static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, @@ -580,7 +584,7 @@ out: } static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, - struct list_head *dir_list) + struct list_head *tmp_inode_list, struct list_head *dir_list) { struct curseg_info *curseg; struct page *page = NULL; @@ -634,7 +638,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, } if (entry->blkaddr == blkaddr) - del_fsync_inode(entry); + list_move_tail(&entry->list, tmp_inode_list); next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); @@ -647,7 +651,7 @@ next: int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) { - struct list_head inode_list; + struct list_head inode_list, tmp_inode_list; struct list_head dir_list; int err; int ret = 0; @@ -678,6 +682,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) } INIT_LIST_HEAD(&inode_list); + INIT_LIST_HEAD(&tmp_inode_list); INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ @@ -696,11 +701,16 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) need_writecp = true; /* step #2: recover data */ - err = recover_data(sbi, &inode_list, &dir_list); + err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); + else { + /* restore s_flags to let iput() trash data */ + sbi->sb->s_flags = s_flags; + } skip: - destroy_fsync_dnodes(&inode_list); + destroy_fsync_dnodes(&inode_list, err); + destroy_fsync_dnodes(&tmp_inode_list, err); /* truncate meta pages to be used by the recovery */ truncate_inode_pages_range(META_MAPPING(sbi), @@ -709,13 +719,13 @@ skip: if (err) { truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi)); + } else { + clear_sbi_flag(sbi, SBI_POR_DOING); } - - clear_sbi_flag(sbi, SBI_POR_DOING); mutex_unlock(&sbi->cp_mutex); /* let's drop all the directory inodes for clean checkpoint */ - destroy_fsync_dnodes(&dir_list); + destroy_fsync_dnodes(&dir_list, err); if (need_writecp) { set_sbi_flag(sbi, SBI_IS_RECOVERED); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4693e4c6d8b6..64d2bc35225e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1992,6 +1992,18 @@ void f2fs_quota_off_umount(struct super_block *sb) } } +static void f2fs_truncate_quota_inode_pages(struct super_block *sb) +{ + struct quota_info *dqopt = sb_dqopt(sb); + int type; + + for (type = 0; type < MAXQUOTAS; type++) { + if (!dqopt->files[type]) + continue; + f2fs_inode_synced(dqopt->files[type]); + } +} + #if 0 static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) { @@ -3288,10 +3300,10 @@ skip_recovery: free_meta: #ifdef CONFIG_QUOTA + f2fs_truncate_quota_inode_pages(sb); if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) f2fs_quota_off_umount(sbi->sb); #endif - f2fs_sync_inode_meta(sbi); /* * Some dirty meta pages can be produced by f2fs_recover_orphan_inodes() * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg() From 0eea3276eebca7f423de34093d04a2e3fc541212 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 20 Sep 2018 20:05:00 +0800 Subject: [PATCH 071/189] f2fs: guarantee journalled quota data by checkpoint For journalled quota mode, let checkpoint to flush dquot dirty data and quota file data to guarntee persistence of all quota sysfile in last checkpoint, by this way, we can avoid corrupting quota sysfile when encountering SPO. The implementation is as below: 1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is cached dquot metadata changes in quota subsystem, and later checkpoint should: a) flush dquot metadata into quota file. b) flush quota file to storage to keep file usage be consistent. 2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota operation failed due to -EIO or -ENOSPC, so later, a) checkpoint will skip syncing dquot metadata. b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a hint for fsck repairing. 3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota data updating is very heavy, it may cause hungtask in block_operation(). To avoid this, if our retry time exceed threshold, let's just skip flushing and retry in next checkpoint(). Signed-off-by: Weichao Guo Signed-off-by: Chao Yu [Jaegeuk Kim: avoid warnings and set fsck flag] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 62 +++++++++++++++++++-- fs/f2fs/data.c | 16 ++++-- fs/f2fs/f2fs.h | 49 +++++++++++++--- fs/f2fs/file.c | 31 ++++++++--- fs/f2fs/inline.c | 4 +- fs/f2fs/inode.c | 11 +++- fs/f2fs/namei.c | 4 -- fs/f2fs/recovery.c | 43 +++++++++++++- fs/f2fs/super.c | 120 ++++++++++++++++++++++++++++++++++++---- include/linux/f2fs_fs.h | 1 + 10 files changed, 294 insertions(+), 47 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 598424c1c4b1..4ed2d2a0bf02 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1080,6 +1080,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) ckpt->next_free_nid = cpu_to_le32(last_nid); } +static bool __need_flush_quota(struct f2fs_sb_info *sbi) +{ + if (!is_journalled_quota(sbi)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + return false; + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) + return true; + if (get_pages(sbi, F2FS_DIRTY_QDATA)) + return true; + return false; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -1091,12 +1106,36 @@ static int block_operations(struct f2fs_sb_info *sbi) .for_reclaim = 0, }; struct blk_plug plug; - int err = 0; + int err = 0, cnt = 0; blk_start_plug(&plug); -retry_flush_dents: +retry_flush_quotas: + if (__need_flush_quota(sbi)) { + int locked; + + if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { + set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); + f2fs_lock_all(sbi); + goto retry_flush_dents; + } + clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + + /* only failed during mount/umount/freeze/quotactl */ + locked = down_read_trylock(&sbi->sb->s_umount); + f2fs_quota_sync(sbi->sb, -1); + if (locked) + up_read(&sbi->sb->s_umount); + } + f2fs_lock_all(sbi); + if (__need_flush_quota(sbi)) { + f2fs_unlock_all(sbi); + cond_resched(); + goto retry_flush_quotas; + } + +retry_flush_dents: /* write all the dirty dentry pages */ if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); @@ -1104,7 +1143,7 @@ retry_flush_dents: if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } /* @@ -1113,6 +1152,12 @@ retry_flush_dents: */ down_write(&sbi->node_change); + if (__need_flush_quota(sbi)) { + up_write(&sbi->node_change); + f2fs_unlock_all(sbi); + goto retry_flush_quotas; + } + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1120,7 +1165,7 @@ retry_flush_dents: if (err) goto out; cond_resched(); - goto retry_flush_dents; + goto retry_flush_quotas; } retry_flush_nodes: @@ -1216,6 +1261,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_DISABLED_FLAG); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + else + __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + + if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) + __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); + /* set this flag to activate crc|cp_ver for recovery */ __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); __clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG); @@ -1423,6 +1476,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); sbi->unusable_block_count = 0; __set_cp_next_pack(sbi); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 47fdb3aa4b21..ac25667e9999 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -45,7 +45,7 @@ static bool __is_cp_guaranteed(struct page *page) inode->i_ino == F2FS_NODE_INO(sbi) || S_ISDIR(inode->i_mode) || (S_ISREG(inode->i_mode) && - is_inode_flag_set(inode, FI_ATOMIC_FILE)) || + (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) || is_cold_data(page)) return true; return false; @@ -1762,6 +1762,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (S_ISDIR(inode->i_mode)) return true; + if (IS_NOQUOTA(inode)) + return true; if (f2fs_is_atomic_file(inode)) return true; if (fio) { @@ -2012,7 +2014,7 @@ out: } unlock_page(page); - if (!S_ISDIR(inode->i_mode)) + if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode)) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { @@ -2203,6 +2205,8 @@ static inline bool __should_serialize_io(struct inode *inode, { if (!S_ISREG(inode->i_mode)) return false; + if (IS_NOQUOTA(inode)) + return false; if (wbc->sync_mode != WB_SYNC_ALL) return true; if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) @@ -2232,7 +2236,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto skip_write; - if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && + if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) && + wbc->sync_mode == WB_SYNC_NONE && get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && f2fs_available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; @@ -2297,7 +2302,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); - f2fs_truncate_blocks(inode, i_size, true); + f2fs_truncate_blocks(inode, i_size, true, true); up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -2436,7 +2441,8 @@ repeat: if (err) goto fail; - if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) { + if (need_balance && !IS_NOQUOTA(inode) && + has_not_enough_free_secs(sbi, 0, 0)) { unlock_page(page); f2fs_balance_fs(sbi, true); lock_page(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b1c2d097073b..ac0c10990bbd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -596,6 +596,9 @@ enum { #define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */ +/* maximum retry quota flush count */ +#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ @@ -1164,6 +1167,9 @@ enum { SBI_IS_SHUTDOWN, /* shutdown by ioctl */ SBI_IS_RECOVERED, /* recovered orphan/data */ SBI_CP_DISABLED, /* CP was disabled last mount */ + SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ + SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ + SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ }; enum { @@ -1988,12 +1994,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, { block_t valid_block_count; unsigned int valid_node_count; - bool quota = inode && !is_inode; + int err; - if (quota) { - int ret = dquot_reserve_block(inode, 1); - if (ret) - return ret; + if (is_inode) { + if (inode) { + err = dquot_alloc_inode(inode); + if (err) + return err; + } + } else { + err = dquot_reserve_block(inode, 1); + if (err) + return err; } if (time_to_inject(sbi, FAULT_BLOCK)) { @@ -2037,8 +2049,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, return 0; enospc: - if (quota) + if (is_inode) { + if (inode) + dquot_free_inode(inode); + } else { dquot_release_reservation_block(inode, 1); + } return -ENOSPC; } @@ -2059,7 +2075,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, spin_unlock(&sbi->stat_lock); - if (!is_inode) + if (is_inode) + dquot_free_inode(inode); + else f2fs_i_blocks_write(inode, 1, false, true); } @@ -2867,7 +2885,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi, */ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void f2fs_truncate_data_blocks(struct dnode_of_data *dn); -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock); +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, + bool buf_write); int f2fs_truncate(struct inode *inode); int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); @@ -2955,6 +2974,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); +int f2fs_quota_sync(struct super_block *sb, int type); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); @@ -3648,3 +3668,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, #endif #endif + +static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_QUOTA + if (f2fs_sb_has_quota_ino(sbi->sb)) + return true; + if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || + F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] || + F2FS_OPTION(sbi).s_qf_names[PRJQUOTA]) + return true; +#endif + return false; +} diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 040c9667347e..3c61f9619be1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -589,7 +589,8 @@ truncate_out: return 0; } -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock, + bool buf_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -597,6 +598,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) int count = 0, err = 0; struct page *ipage; bool truncate_page = false; + int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO; trace_f2fs_truncate_blocks_enter(inode, from); @@ -606,7 +608,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) goto free_partial; if (lock) - f2fs_lock_op(sbi); + __do_map_lock(sbi, flag, true); ipage = f2fs_get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { @@ -644,7 +646,7 @@ free_next: err = f2fs_truncate_inode_blocks(inode, free_from); out: if (lock) - f2fs_unlock_op(sbi); + __do_map_lock(sbi, flag, false); free_partial: /* lastly zero out the first data page */ if (!err) @@ -679,7 +681,7 @@ int f2fs_truncate(struct inode *inode) return err; } - err = f2fs_truncate_blocks(inode, i_size_read(inode), true); + err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); if (err) return err; @@ -789,9 +791,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) !uid_eq(attr->ia_uid, inode->i_uid)) || (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { + f2fs_lock_op(F2FS_I_SB(inode)); err = dquot_transfer(inode, attr); - if (err) + if (err) { + set_sbi_flag(F2FS_I_SB(inode), + SBI_QUOTA_NEED_REPAIR); + f2fs_unlock_op(F2FS_I_SB(inode)); return err; + } + /* + * update uid/gid under lock_op(), so that dquot and inode can + * be updated atomically. + */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + f2fs_mark_inode_dirty_sync(inode, true); + f2fs_unlock_op(F2FS_I_SB(inode)); } if (attr->ia_valid & ATTR_SIZE) { @@ -1249,7 +1266,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) new_size = i_size_read(inode) - len; truncate_pagecache(inode, new_size); - ret = f2fs_truncate_blocks(inode, new_size, true); + ret = f2fs_truncate_blocks(inode, new_size, true, false); up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); @@ -1434,7 +1451,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); down_write(&F2FS_I(inode)->i_mmap_sem); - ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); + ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false); up_write(&F2FS_I(inode)->i_mmap_sem); if (ret) return ret; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8caaab0685c7..15269b971e59 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -298,7 +298,7 @@ process_inline: clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { - if (f2fs_truncate_blocks(inode, 0, false)) + if (f2fs_truncate_blocks(inode, 0, false, false)) return false; goto process_inline; } @@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) return 0; punch_dentry_pages: truncate_inode_pages(&dir->i_data, 0); - f2fs_truncate_blocks(dir, 0, false); + f2fs_truncate_blocks(dir, 0, false, false); f2fs_remove_dirty_inode(dir); return err; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 4408feee9488..1b1f22faff22 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -654,7 +654,11 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; - dquot_initialize(inode); + err = dquot_initialize(inode); + if (err) { + err = 0; + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + } f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); @@ -686,9 +690,10 @@ retry: goto retry; } - if (err) + if (err) { f2fs_update_inode_page(inode); - dquot_free_inode(inode); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + } sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index dd1748c5e7f0..3ae6f5ae597e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -72,10 +72,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (err) goto fail_drop; - err = dquot_alloc_inode(inode); - if (err) - goto fail_drop; - set_inode_flag(inode, FI_NEW_INODE); /* If the directory encrypted, then we should encrypt the inode. */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b7173ae30c75..336273a81fba 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -195,6 +195,33 @@ out: return err; } +static int recover_quota_data(struct inode *inode, struct page *page) +{ + struct f2fs_inode *raw = F2FS_INODE(page); + struct iattr attr; + uid_t i_uid = le32_to_cpu(raw->i_uid); + gid_t i_gid = le32_to_cpu(raw->i_gid); + int err; + + memset(&attr, 0, sizeof(attr)); + + attr.ia_uid = make_kuid(&init_user_ns, i_uid); + attr.ia_gid = make_kgid(&init_user_ns, i_gid); + + if (!uid_eq(attr.ia_uid, inode->i_uid)) + attr.ia_valid |= ATTR_UID; + if (!gid_eq(attr.ia_gid, inode->i_gid)) + attr.ia_valid |= ATTR_GID; + + if (!attr.ia_valid) + return 0; + + err = dquot_transfer(inode, &attr); + if (err) + set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR); + return err; +} + static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) { if (ri->i_inline & F2FS_PIN_FILE) @@ -207,12 +234,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri) clear_inode_flag(inode, FI_DATA_EXIST); } -static void recover_inode(struct inode *inode, struct page *page) +static int recover_inode(struct inode *inode, struct page *page) { struct f2fs_inode *raw = F2FS_INODE(page); char *name; + int err; inode->i_mode = le16_to_cpu(raw->i_mode); + + err = recover_quota_data(inode, page); + if (err) + return err; + i_uid_write(inode, le32_to_cpu(raw->i_uid)); i_gid_write(inode, le32_to_cpu(raw->i_gid)); @@ -254,6 +287,7 @@ static void recover_inode(struct inode *inode, struct page *page) f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s, inline = %x", ino_of_node(page), name, raw->i_inline); + return 0; } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, @@ -622,8 +656,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(page)) - recover_inode(entry->inode, page); + if (IS_INODE(page)) { + err = recover_inode(entry->inode, page); + if (err) + break; + } if (entry->last_dentry == blkaddr) { err = recover_dentry(entry->inode, page, dir_list); if (err) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 64d2bc35225e..c551d5336473 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1712,6 +1712,7 @@ repeat: congestion_wait(BLK_RW_ASYNC, HZ/50); goto repeat; } + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); return PTR_ERR(page); } @@ -1723,6 +1724,7 @@ repeat: } if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); return -EIO; } @@ -1764,6 +1766,7 @@ retry: congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); break; } @@ -1800,6 +1803,12 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { + if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) { + f2fs_msg(sbi->sb, KERN_ERR, + "quota sysfile may be corrupted, skip loading it"); + return 0; + } + return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type], F2FS_OPTION(sbi).s_jquota_fmt, type); } @@ -1870,7 +1879,14 @@ static int f2fs_enable_quotas(struct super_block *sb) test_opt(F2FS_SB(sb), PRJQUOTA), }; + if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) { + f2fs_msg(sb, KERN_ERR, + "quota file may be corrupted, skip loading it"); + return 0; + } + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; + for (type = 0; type < MAXQUOTAS; type++) { qf_inum = f2fs_qf_ino(sb, type); if (qf_inum) { @@ -1884,6 +1900,8 @@ static int f2fs_enable_quotas(struct super_block *sb) "fsck to fix.", type, err); for (type--; type >= 0; type--) dquot_quota_off(sb, type); + set_sbi_flag(F2FS_SB(sb), + SBI_QUOTA_NEED_REPAIR); return err; } } @@ -1891,35 +1909,51 @@ static int f2fs_enable_quotas(struct super_block *sb) return 0; } -static int f2fs_quota_sync(struct super_block *sb, int type) +int f2fs_quota_sync(struct super_block *sb, int type) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; int ret; ret = dquot_writeback_dquots(sb, type); if (ret) - return ret; + goto out; /* * Now when everything is written we can discard the pagecache so * that userspace sees the changes. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + struct address_space *mapping; + if (type != -1 && cnt != type) continue; if (!sb_has_quota_active(sb, cnt)) continue; - ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping); + mapping = dqopt->files[cnt]->i_mapping; + + ret = filemap_fdatawrite(mapping); if (ret) - return ret; + goto out; + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + continue; + + ret = filemap_fdatawait(mapping); + if (ret) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); inode_lock(dqopt->files[cnt]); truncate_inode_pages(&dqopt->files[cnt]->i_data, 0); inode_unlock(dqopt->files[cnt]); } - return 0; +out: + if (ret) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + return ret; } static int f2fs_quota_on(struct super_block *sb, int type, int format_id, @@ -1987,7 +2021,7 @@ void f2fs_quota_off_umount(struct super_block *sb) "Fail to turn off disk quota " "(type: %d, err: %d, ret:%d), Please " "run fsck to fix it.", type, err, ret); - set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK); + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); } } } @@ -2004,6 +2038,62 @@ static void f2fs_truncate_quota_inode_pages(struct super_block *sb) } } +static int f2fs_dquot_commit(struct dquot *dquot) +{ + int ret; + + ret = dquot_commit(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + +static int f2fs_dquot_acquire(struct dquot *dquot) +{ + int ret; + + ret = dquot_acquire(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + + return ret; +} + +static int f2fs_dquot_release(struct dquot *dquot) +{ + int ret; + + ret = dquot_release(dquot); + if (ret < 0) + set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + +static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) +{ + struct super_block *sb = dquot->dq_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int ret; + + ret = dquot_mark_dquot_dirty(dquot); + + /* if we are using journalled quota */ + if (is_journalled_quota(sbi)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + + return ret; +} + +static int f2fs_dquot_commit_info(struct super_block *sb, int type) +{ + int ret; + + ret = dquot_commit_info(sb, type); + if (ret < 0) + set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + return ret; +} + #if 0 static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) { @@ -2014,11 +2104,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid) static const struct dquot_operations f2fs_quota_operations = { .get_reserved_space = f2fs_get_reserved_space, - .write_dquot = dquot_commit, - .acquire_dquot = dquot_acquire, - .release_dquot = dquot_release, - .mark_dirty = dquot_mark_dquot_dirty, - .write_info = dquot_commit_info, + .write_dquot = f2fs_dquot_commit, + .acquire_dquot = f2fs_dquot_acquire, + .release_dquot = f2fs_dquot_release, + .mark_dirty = f2fs_dquot_mark_dquot_dirty, + .write_info = f2fs_dquot_commit_info, .alloc_dquot = dquot_alloc, .destroy_dquot = dquot_destroy, #if 0 @@ -2037,6 +2127,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = { .set_dqblk = dquot_set_dqblk, }; #else +int f2fs_quota_sync(struct super_block *sb, int type) +{ + return 0; +} + void f2fs_quota_off_umount(struct super_block *sb) { } @@ -3111,6 +3206,9 @@ try_onemore: goto free_meta_inode; } + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG)) + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + /* Initialize device list */ err = f2fs_scan_devices(sbi); if (err) { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 6c2786db5154..162f83358abb 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -117,6 +117,7 @@ struct f2fs_super_block { * For checkpoint */ #define CP_DISABLED_FLAG 0x00001000 +#define CP_QUOTA_NEED_FSCK_FLAG 0x00000800 #define CP_LARGE_NAT_BITMAP_FLAG 0x00000400 #define CP_NOCRC_RECOVERY_FLAG 0x00000200 #define CP_TRIMMED_FLAG 0x00000100 From 1bdb20fcd45751bb7ec7b4399579ceb0d1fcc504 Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Fri, 6 Jul 2018 16:24:27 -0700 Subject: [PATCH 072/189] ANDROID: sdcardfs: Add option to drop unused dentries This adds the nocache mount option, which will cause sdcardfs to always drop dentries that are not in use, preventing cached entries from holding on to lower dentries, which could cause strange behavior when bypassing the sdcardfs layer and directly changing the lower fs. Change-Id: I70268584a20b989ae8cfdd278a2e4fa1605217fb Signed-off-by: Daniel Rosenberg --- fs/sdcardfs/dentry.c | 7 +++++++ fs/sdcardfs/main.c | 6 ++++++ fs/sdcardfs/sdcardfs.h | 1 + fs/sdcardfs/super.c | 2 ++ 4 files changed, 16 insertions(+) diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index 3795d2c26915..eb279de02ffb 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -123,6 +123,12 @@ out: return err; } +/* 1 = delete, 0 = cache */ +static int sdcardfs_d_delete(const struct dentry *d) +{ + return SDCARDFS_SB(d->d_sb)->options.nocache ? 1 : 0; +} + static void sdcardfs_d_release(struct dentry *dentry) { if (!dentry || !dentry->d_fsdata) @@ -182,6 +188,7 @@ static void sdcardfs_canonical_path(const struct path *path, const struct dentry_operations sdcardfs_ci_dops = { .d_revalidate = sdcardfs_d_revalidate, + .d_delete = sdcardfs_d_delete, .d_release = sdcardfs_d_release, .d_hash = sdcardfs_hash_ci, .d_compare = sdcardfs_cmp_ci, diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 27ec726e7a46..ba52af8644cc 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -34,6 +34,7 @@ enum { Opt_reserved_mb, Opt_gid_derivation, Opt_default_normal, + Opt_nocache, Opt_err, }; @@ -48,6 +49,7 @@ static const match_table_t sdcardfs_tokens = { {Opt_gid_derivation, "derive_gid"}, {Opt_default_normal, "default_normal"}, {Opt_reserved_mb, "reserved_mb=%u"}, + {Opt_nocache, "nocache"}, {Opt_err, NULL} }; @@ -71,6 +73,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, /* by default, gid derivation is off */ opts->gid_derivation = false; opts->default_normal = false; + opts->nocache = false; *debug = 0; @@ -128,6 +131,9 @@ static int parse_options(struct super_block *sb, char *options, int silent, case Opt_default_normal: opts->default_normal = true; break; + case Opt_nocache: + opts->nocache = true; + break; /* unknown option */ default: if (!silent) diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 99227a07a8d6..57be4761d32b 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -198,6 +198,7 @@ struct sdcardfs_mount_options { bool gid_derivation; bool default_normal; unsigned int reserved_mb; + bool nocache; }; struct sdcardfs_vfsmount_options { diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index cffcdb11cb8a..140696ed3ed3 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -311,6 +311,8 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, seq_puts(m, ",default_normal"); if (opts->reserved_mb != 0) seq_printf(m, ",reserved=%uMB", opts->reserved_mb); + if (opts->nocache) + seq_printf(m, ",nocache"); return 0; }; From 64102d341c131958f108b1fc397f16953803f53c Mon Sep 17 00:00:00 2001 From: Benedict Wong Date: Wed, 5 Sep 2018 11:00:08 -0700 Subject: [PATCH 073/189] BACKPORT: xfrm: Allow Output Mark to be Updated Using UPDSA Allow UPDSA to change "output mark" to permit policy separation of packet routing decisions from SA keying in systems that use mark-based routing. The set mark, used as a routing and firewall mark for outbound packets, is made update-able which allows routing decisions to be handled independently of keying/SA creation. To maintain consistency with other optional attributes, the output mark is only updated if sent with a non-zero value. The per-SA lock and the xfrm_state_lock are taken in that order to avoid a deadlock with xfrm_timer_handler(), which also takes the locks in that order. Signed-off-by: Nathan Harold Signed-off-by: Steffen Klassert (cherry picked from commit 6d8e85ffe17895d7bc632dfbaa9e2e33b22fe873) Backport resolution required using props.output_mark instead of props.smark Change-Id: I08c7bfc114ac9826a8a18f5ac1c3ff17a4e0940b Signed-off-by: Benedict Wong Bug: 114060045 --- net/xfrm/xfrm_state.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9b6e51450fc5..99e013f3a88d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1357,6 +1357,15 @@ out: if (x1->curlft.use_time) xfrm_state_check_expire(x1); + if (x->props.output_mark) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); + + x1->props.output_mark = x->props.output_mark; + + __xfrm_state_bump_genids(x1); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + } + err = 0; x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); From 5ce93bd4cc02ef3c1b25587ce65eaa84f6b4172a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 1 Aug 2018 13:45:11 +0200 Subject: [PATCH 074/189] xfrm: Validate address prefix lengths in the xfrm selector. [ Upstream commit 07bf7908950a8b14e81aa1807e3c667eab39287a ] We don't validate the address prefix lengths in the xfrm selector we got from userspace. This can lead to undefined behaviour in the address matching functions if the prefix is too big for the given address family. Fix this by checking the prefixes and refuse SA/policy insertation when a prefix is invalid. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Air Icy Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_user.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a9b4491a3cc4..c2e98dcba9fe 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -151,10 +151,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + goto out; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + goto out; + break; #else err = -EAFNOSUPPORT; @@ -1312,10 +1318,16 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) switch (p->sel.family) { case AF_INET: + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + return -EINVAL; + break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + return -EINVAL; + break; #else return -EAFNOSUPPORT; From 4e16c61e87a1c3fc79253fc07cf043533b33b661 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 31 Aug 2018 08:38:49 -0300 Subject: [PATCH 075/189] xfrm6: call kfree_skb when skb is toobig [ Upstream commit 215ab0f021c9fea3c18b75e7d522400ee6a49990 ] After commit d6990976af7c5d8f55903bfb4289b6fb030bf754 ("vti6: fix PMTU caching and reporting on xmit"), some too big skbs might be potentially passed down to __xfrm6_output, causing it to fail to transmit but not free the skb, causing a leak of skb, and consequentially a leak of dst references. After running pmtu.sh, that shows as failure to unregister devices in a namespace: [ 311.397671] unregister_netdevice: waiting for veth_b to become free. Usage count = 1 The fix is to call kfree_skb in case of transmit failures. Fixes: dd767856a36e ("xfrm6: Don't call icmpv6_send on local error") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Sabrina Dubroca Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv6/xfrm6_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 4d09ce6fa90e..64862c5084ee 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -165,9 +165,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (toobig && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } else if (!skb->ignore_df && toobig && skb->sk) { xfrm_local_error(skb, mtu); + kfree_skb(skb); return -EMSGSIZE; } From 7402bc9ca983a6a4c1545a4048e8b1a859b882ba Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:13 +0300 Subject: [PATCH 076/189] mac80211: Always report TX status [ Upstream commit 8682250b3c1b75a45feb7452bc413d004cfe3778 ] If a frame is dropped for any reason, mac80211 wouldn't report the TX status back to user space. As the user space may rely on the TX_STATUS to kick its state machines, resends etc, it's better to just report this frame as not acked instead. Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/mac80211/status.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 45fb1abdb265..2731cf5bf052 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -466,11 +466,6 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, if (!skb) return; - if (dropped) { - dev_kfree_skb_any(skb); - return; - } - if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) { u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie; struct ieee80211_sub_if_data *sdata; @@ -491,6 +486,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, } rcu_read_unlock(); + dev_kfree_skb_any(skb); + } else if (dropped) { dev_kfree_skb_any(skb); } else { /* consumes skb */ From db420bc4b798c1a5c91ce87e40011c89c118aef7 Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Wed, 5 Sep 2018 08:06:12 +0300 Subject: [PATCH 077/189] cfg80211: reg: Init wiphy_idx in regulatory_hint_core() [ Upstream commit 24f33e64fcd0d50a4b1a8e5b41bd0257aa66b0e8 ] Core regulatory hints didn't set wiphy_idx to WIPHY_IDX_INVALID. Since the regulatory request is zeroed, wiphy_idx was always implicitly set to 0. This resulted in updating only phy #0. Fix that. Fixes: 806a9e39670b ("cfg80211: make regulatory_request use wiphy_idx instead of wiphy") Signed-off-by: Andrei Otcheretianski Signed-off-by: Luca Coelho [add fixes tag] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 06d050da0d94..50dffd183cc6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2367,6 +2367,7 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_CORE; + request->wiphy_idx = WIPHY_IDX_INVALID; queue_regulatory_request(request); From f4c24fd3b6919a7fb0fa5e858381299e593daf44 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 13 Sep 2018 16:48:08 +0100 Subject: [PATCH 078/189] ARM: 8799/1: mm: fix pci_ioremap_io() offset check [ Upstream commit 3a58ac65e2d7969bcdf1b6acb70fa4d12a88e53e ] IO_SPACE_LIMIT is the ending address of the PCI IO space, i.e something like 0xfffff (and not 0x100000). Therefore, when offset = 0xf0000 is passed as argument, this function fails even though the offset + SZ_64K fits below the IO_SPACE_LIMIT. This makes the last chunk of 64 KB of the I/O space not usable as it cannot be mapped. This patch fixes that by substracing 1 to offset + SZ_64K, so that we compare the addrss of the last byte of the I/O space against IO_SPACE_LIMIT instead of the address of the first byte of what is after the I/O space. Fixes: c2794437091a4 ("ARM: Add fixed PCI i/o mapping") Signed-off-by: Thomas Petazzoni Acked-by: Nicolas Pitre Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/mm/ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 0c81056c1dd7..2a3feb73de0b 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -460,7 +460,7 @@ void pci_ioremap_set_mem_type(int mem_type) int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) { - BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT); return ioremap_page_range(PCI_IO_VIRT_BASE + offset, PCI_IO_VIRT_BASE + offset + SZ_64K, From 5217bec5a6e7bd3c95d1229fc6029d956a470060 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Wed, 19 Sep 2018 13:54:56 -0600 Subject: [PATCH 079/189] xfrm: validate template mode [ Upstream commit 32bf94fb5c2ec4ec842152d0e5937cd4bb6738fa ] XFRM mode parameters passed as part of the user templates in the IP_XFRM_POLICY are never properly validated. Passing values other than valid XFRM modes can cause stack-out-of-bounds reads to occur later in the XFRM processing: [ 140.535608] ================================================================ [ 140.543058] BUG: KASAN: stack-out-of-bounds in xfrm_state_find+0x17e4/0x1cc4 [ 140.550306] Read of size 4 at addr ffffffc0238a7a58 by task repro/5148 [ 140.557369] [ 140.558927] Call trace: [ 140.558936] dump_backtrace+0x0/0x388 [ 140.558940] show_stack+0x24/0x30 [ 140.558946] __dump_stack+0x24/0x2c [ 140.558949] dump_stack+0x8c/0xd0 [ 140.558956] print_address_description+0x74/0x234 [ 140.558960] kasan_report+0x240/0x264 [ 140.558963] __asan_report_load4_noabort+0x2c/0x38 [ 140.558967] xfrm_state_find+0x17e4/0x1cc4 [ 140.558971] xfrm_resolve_and_create_bundle+0x40c/0x1fb8 [ 140.558975] xfrm_lookup+0x238/0x1444 [ 140.558977] xfrm_lookup_route+0x48/0x11c [ 140.558984] ip_route_output_flow+0x88/0xc4 [ 140.558991] raw_sendmsg+0xa74/0x266c [ 140.558996] inet_sendmsg+0x258/0x3b0 [ 140.559002] sock_sendmsg+0xbc/0xec [ 140.559005] SyS_sendto+0x3a8/0x5a8 [ 140.559008] el0_svc_naked+0x34/0x38 [ 140.559009] [ 140.592245] page dumped because: kasan: bad access detected [ 140.597981] page_owner info is not active (free page?) [ 140.603267] [ 140.653503] ================================================================ Signed-off-by: Sean Tranchetti Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c2e98dcba9fe..476f1fc6d655 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1408,6 +1408,9 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) (ut[i].family != prev_family)) return -EINVAL; + if (ut[i].mode >= XFRM_MODE_MAX) + return -EINVAL; + prev_family = ut[i].family; switch (ut[i].family) { From 6879c047ae6e9b6b43e54e8a34bebb7a52467198 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 25 Sep 2018 09:51:02 +0200 Subject: [PATCH 080/189] mac80211_hwsim: do not omit multicast announce of first added radio [ Upstream commit 28ef8b49a338dc1844e86b7954cfffc7dfa2660a ] The allocation of hwsim radio identifiers uses a post-increment from 0, so the first radio has idx 0. This idx is explicitly excluded from multicast announcements ever since, but it is unclear why. Drop that idx check and announce the first radio as well. This makes userspace happy if it relies on these events. Signed-off-by: Martin Willi Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/mac80211_hwsim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index c98cb962b454..05413176a5d6 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -2547,8 +2547,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, list_add_tail(&data->list, &hwsim_radios); spin_unlock_bh(&hwsim_radio_lock); - if (idx > 0) - hwsim_mcast_new_radio(idx, info, param); + hwsim_mcast_new_radio(idx, info, param); return idx; From b0c52fbff860b5dd23df44fd36f48e6a52ba8bfa Mon Sep 17 00:00:00 2001 From: Matias Karhumaa Date: Wed, 26 Sep 2018 09:13:46 +0300 Subject: [PATCH 081/189] Bluetooth: SMP: fix crash in unpairing [ Upstream commit cb28c306b93b71f2741ce1a5a66289db26715f4d ] In case unpair_device() was called through mgmt interface at the same time when pairing was in progress, Bluetooth kernel module crash was seen. [ 600.351225] general protection fault: 0000 [#1] SMP PTI [ 600.351235] CPU: 1 PID: 11096 Comm: btmgmt Tainted: G OE 4.19.0-rc1+ #1 [ 600.351238] Hardware name: Dell Inc. Latitude E5440/08RCYC, BIOS A18 05/14/2017 [ 600.351272] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth] [ 600.351276] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01 [ 600.351279] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246 [ 600.351282] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60 [ 600.351285] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500 [ 600.351287] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00 [ 600.351290] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800 [ 600.351292] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00 [ 600.351295] FS: 00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000 [ 600.351298] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 600.351300] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0 [ 600.351302] Call Trace: [ 600.351325] smp_failure+0x4f/0x70 [bluetooth] [ 600.351345] smp_cancel_pairing+0x74/0x80 [bluetooth] [ 600.351370] unpair_device+0x1c1/0x330 [bluetooth] [ 600.351399] hci_sock_sendmsg+0x960/0x9f0 [bluetooth] [ 600.351409] ? apparmor_socket_sendmsg+0x1e/0x20 [ 600.351417] sock_sendmsg+0x3e/0x50 [ 600.351422] sock_write_iter+0x85/0xf0 [ 600.351429] do_iter_readv_writev+0x12b/0x1b0 [ 600.351434] do_iter_write+0x87/0x1a0 [ 600.351439] vfs_writev+0x98/0x110 [ 600.351443] ? ep_poll+0x16d/0x3d0 [ 600.351447] ? ep_modify+0x73/0x170 [ 600.351451] do_writev+0x61/0xf0 [ 600.351455] ? do_writev+0x61/0xf0 [ 600.351460] __x64_sys_writev+0x1c/0x20 [ 600.351465] do_syscall_64+0x5a/0x110 [ 600.351471] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 600.351474] RIP: 0033:0x7fb2bdb62fe0 [ 600.351477] Code: 73 01 c3 48 8b 0d b8 6e 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 69 c7 2c 00 00 75 10 b8 14 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 de 80 01 00 48 89 04 24 [ 600.351479] RSP: 002b:00007ffe062cb8f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000014 [ 600.351484] RAX: ffffffffffffffda RBX: 000000000255b3d0 RCX: 00007fb2bdb62fe0 [ 600.351487] RDX: 0000000000000001 RSI: 00007ffe062cb920 RDI: 0000000000000004 [ 600.351490] RBP: 00007ffe062cb920 R08: 000000000255bd80 R09: 0000000000000000 [ 600.351494] R10: 0000000000000353 R11: 0000000000000246 R12: 0000000000000001 [ 600.351497] R13: 00007ffe062cbbe0 R14: 0000000000000000 R15: 0000000000000000 [ 600.351501] Modules linked in: algif_hash algif_skcipher af_alg cmac ipt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo iptable_nat nf_nat_ipv4 xt_addrtype iptable_filter ip_tables xt_conntrack x_tables nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c br_netfilter bridge stp llc overlay arc4 nls_iso8859_1 dm_crypt intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp dell_laptop kvm_intel crct10dif_pclmul dell_smm_hwmon crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper intel_cstate intel_rapl_perf uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common videodev media hid_multitouch input_leds joydev serio_raw dell_wmi snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic dell_smbios dcdbas sparse_keymap [ 600.351569] snd_hda_intel btusb snd_hda_codec btrtl btbcm btintel snd_hda_core bluetooth(OE) snd_hwdep snd_pcm iwlmvm ecdh_generic wmi_bmof dell_wmi_descriptor snd_seq_midi mac80211 snd_seq_midi_event lpc_ich iwlwifi snd_rawmidi snd_seq snd_seq_device snd_timer cfg80211 snd soundcore mei_me mei dell_rbtn dell_smo8800 mac_hid parport_pc ppdev lp parport autofs4 hid_generic usbhid hid i915 nouveau kvmgt vfio_mdev mdev vfio_iommu_type1 vfio kvm irqbypass i2c_algo_bit ttm drm_kms_helper syscopyarea sysfillrect sysimgblt mxm_wmi psmouse ahci sdhci_pci cqhci libahci fb_sys_fops sdhci drm e1000e video wmi [ 600.351637] ---[ end trace e49e9f1df09c94fb ]--- [ 600.351664] RIP: 0010:smp_chan_destroy.isra.10+0xce/0x2c0 [bluetooth] [ 600.351666] Code: c0 0f 84 b4 01 00 00 80 78 28 04 0f 84 53 01 00 00 4d 85 ed 0f 85 ab 00 00 00 48 8b 08 48 8b 50 08 be 10 00 00 00 48 89 51 08 <48> 89 0a 48 b9 00 02 00 00 00 00 ad de 48 89 48 08 48 8b 83 00 01 [ 600.351669] RSP: 0018:ffffa9be839b3b50 EFLAGS: 00010246 [ 600.351672] RAX: ffff9c999ac565a0 RBX: ffff9c9996e98c00 RCX: ffff9c999aa28b60 [ 600.351674] RDX: dead000000000200 RSI: 0000000000000010 RDI: ffff9c999e403500 [ 600.351676] RBP: ffffa9be839b3b70 R08: 0000000000000000 R09: ffffffff92a25c00 [ 600.351679] R10: ffffa9be839b3ae8 R11: 0000000000000001 R12: ffff9c995375b800 [ 600.351681] R13: 0000000000000000 R14: ffff9c99619a5000 R15: ffff9c9962a01c00 [ 600.351684] FS: 00007fb2be27c700(0000) GS:ffff9c999e880000(0000) knlGS:0000000000000000 [ 600.351686] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 600.351689] CR2: 00007fb2bdadbad0 CR3: 000000041c328001 CR4: 00000000001606e0 Crash happened because list_del_rcu() was called twice for smp->ltk. This was possible if unpair_device was called right after ltk was generated but before keys were distributed. In this commit smp_cancel_pairing was refactored to cancel pairing if it is in progress and otherwise just removes keys. Once keys are removed from rcu list, pointers to smp context's keys are set to NULL to make sure removed list items are not accessed later. This commit also adjusts the functionality of mgmt unpair_device() little bit. Previously pairing was canceled only if pairing was in state that keys were already generated. With this commit unpair_device() cancels pairing already in earlier states. Bug was found by fuzzing kernel SMP implementation using Synopsys Defensics. Reported-by: Pekka Oikarainen Signed-off-by: Matias Karhumaa Signed-off-by: Johan Hedberg Signed-off-by: Sasha Levin --- net/bluetooth/mgmt.c | 7 ++----- net/bluetooth/smp.c | 29 +++++++++++++++++++++++++---- net/bluetooth/smp.h | 3 ++- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index b1b0a1c0bd8d..ecc3da6a14a1 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3083,9 +3083,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, /* LE address type */ addr_type = le_addr_type(cp->addr.type); - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); - - err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); + /* Abort any ongoing SMP pairing. Removes ltk and irk if they exist. */ + err = smp_cancel_and_remove_pairing(hdev, &cp->addr.bdaddr, addr_type); if (err < 0) { err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE, MGMT_STATUS_NOT_PAIRED, &rp, @@ -3099,8 +3098,6 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, goto done; } - /* Abort any ongoing SMP pairing */ - smp_cancel_pairing(conn); /* Defer clearing up the connection parameters until closing to * give a chance of keeping them if a repairing happens. diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 0dc27d2e8f18..bedfaef2c59e 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2371,30 +2371,51 @@ unlock: return ret; } -void smp_cancel_pairing(struct hci_conn *hcon) +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type) { - struct l2cap_conn *conn = hcon->l2cap_data; + struct hci_conn *hcon; + struct l2cap_conn *conn; struct l2cap_chan *chan; struct smp_chan *smp; + int err; + err = hci_remove_ltk(hdev, bdaddr, addr_type); + hci_remove_irk(hdev, bdaddr, addr_type); + + hcon = hci_conn_hash_lookup_le(hdev, bdaddr, addr_type); + if (!hcon) + goto done; + + conn = hcon->l2cap_data; if (!conn) - return; + goto done; chan = conn->smp; if (!chan) - return; + goto done; l2cap_chan_lock(chan); smp = chan->data; if (smp) { + /* Set keys to NULL to make sure smp_failure() does not try to + * remove and free already invalidated rcu list entries. */ + smp->ltk = NULL; + smp->slave_ltk = NULL; + smp->remote_irk = NULL; + if (test_bit(SMP_FLAG_COMPLETE, &smp->flags)) smp_failure(conn, 0); else smp_failure(conn, SMP_UNSPECIFIED); + err = 0; } l2cap_chan_unlock(chan); + +done: + return err; } static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb) diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h index ffcc70b6b199..993cbd7bcfe7 100644 --- a/net/bluetooth/smp.h +++ b/net/bluetooth/smp.h @@ -180,7 +180,8 @@ enum smp_key_pref { }; /* SMP Commands */ -void smp_cancel_pairing(struct hci_conn *hcon); +int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type); bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, enum smp_key_pref key_pref); int smp_conn_security(struct hci_conn *hcon, __u8 sec_level); From df2d090bc5df33e92b43b2134d118fc60e2e0c97 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Wed, 26 Sep 2018 18:11:22 +0200 Subject: [PATCH 082/189] pxa168fb: prepare the clock [ Upstream commit d85536cde91fcfed6fb8d983783bd2b92c843939 ] Add missing prepare/unprepare operations for fbi->clk, this fixes following kernel warning: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:874 clk_core_enable+0x2c/0x1b0 Enabling unprepared disp0_clk Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.18.0-rc8-00032-g02b43ddd4f21-dirty #25 Hardware name: Marvell MMP2 (Device Tree Support) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (__warn+0xd8/0xf0) [] (__warn) from [] (warn_slowpath_fmt+0x44/0x6c) [] (warn_slowpath_fmt) from [] (clk_core_enable+0x2c/0x1b0) [] (clk_core_enable) from [] (clk_core_enable_lock+0x18/0x2c) [] (clk_core_enable_lock) from [] (pxa168fb_probe+0x464/0x6ac) [] (pxa168fb_probe) from [] (platform_drv_probe+0x48/0x94) [] (platform_drv_probe) from [] (driver_probe_device+0x328/0x470) [] (driver_probe_device) from [] (__driver_attach+0xb0/0x124) [] (__driver_attach) from [] (bus_for_each_dev+0x64/0xa0) [] (bus_for_each_dev) from [] (bus_add_driver+0x1b8/0x230) [] (bus_add_driver) from [] (driver_register+0xac/0xf0) [] (driver_register) from [] (do_one_initcall+0xb8/0x1f0) [] (do_one_initcall) from [] (kernel_init_freeable+0x294/0x2e0) [] (kernel_init_freeable) from [] (kernel_init+0x8/0x10c) [] (kernel_init) from [] (ret_from_fork+0x14/0x2c) Exception stack(0xd008bfb0 to 0xd008bff8) bfa0: 00000000 00000000 00000000 00000000 bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 ---[ end trace c0af40f9e2ed7cb4 ]--- Signed-off-by: Lubomir Rintel [b.zolnierkie: enhance patch description a bit] Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/pxa168fb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index efb57c059997..5190b1749e2a 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -712,7 +712,7 @@ static int pxa168fb_probe(struct platform_device *pdev) /* * enable controller clock */ - clk_enable(fbi->clk); + clk_prepare_enable(fbi->clk); pxa168fb_set_par(info); @@ -767,7 +767,7 @@ static int pxa168fb_probe(struct platform_device *pdev) failed_free_cmap: fb_dealloc_cmap(&info->cmap); failed_free_clk: - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); failed_free_fbmem: dma_free_coherent(fbi->dev, info->fix.smem_len, info->screen_base, fbi->fb_start_dma); @@ -807,7 +807,7 @@ static int pxa168fb_remove(struct platform_device *pdev) dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len), info->screen_base, info->fix.smem_start); - clk_disable(fbi->clk); + clk_disable_unprepare(fbi->clk); framebuffer_release(info); From 644d1918531ba9d6efa74e2228df8f0dd7002b53 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:50 -0700 Subject: [PATCH 083/189] asix: Check for supported Wake-on-LAN modes [ Upstream commit c4ce446e33d7a0e978256ac6fea4c80e59d9de5f ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/asix_common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 7fbd8f044207..2092ef6431f2 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -449,6 +449,9 @@ int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) From f3d71a323f51d033929ed70b53bd4bca27c7da3c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:51 -0700 Subject: [PATCH 084/189] ax88179_178a: Check for supported Wake-on-LAN modes [ Upstream commit 5ba6b4aa9a410c5e2c6417df52b5e2118ea9b467 ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: e2ca90c276e1 ("ax88179_178a: ASIX AX88179_178A USB 3.0/2.0 to gigabit ethernet adapter driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/ax88179_178a.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index e6338c16081a..e3f2e6098db4 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -566,6 +566,9 @@ ax88179_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_MODE_RWLC; if (wolinfo->wolopts & WAKE_MAGIC) From 04d846cddb4a941b5dc5b49b677736eb0c9e8431 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:52 -0700 Subject: [PATCH 085/189] lan78xx: Check for supported Wake-on-LAN modes [ Upstream commit eb9ad088f96653a26b340f7c447c44cf023d5cdc ] The driver supports a fair amount of Wake-on-LAN modes, but is not checking that the user specified one that is supported. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Florian Fainelli Reviewed-by: Woojung Huh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/lan78xx.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 1aede726052c..45a6a7cae4bf 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1051,19 +1051,10 @@ static int lan78xx_set_wol(struct net_device *netdev, if (ret < 0) return ret; - pdata->wol = 0; - if (wol->wolopts & WAKE_UCAST) - pdata->wol |= WAKE_UCAST; - if (wol->wolopts & WAKE_MCAST) - pdata->wol |= WAKE_MCAST; - if (wol->wolopts & WAKE_BCAST) - pdata->wol |= WAKE_BCAST; - if (wol->wolopts & WAKE_MAGIC) - pdata->wol |= WAKE_MAGIC; - if (wol->wolopts & WAKE_PHY) - pdata->wol |= WAKE_PHY; - if (wol->wolopts & WAKE_ARP) - pdata->wol |= WAKE_ARP; + if (wol->wolopts & ~WAKE_ALL) + return -EINVAL; + + pdata->wol = wol->wolopts; device_set_wakeup_enable(&dev->udev->dev, (bool)wol->wolopts); From 24665cbd768ad4b923cce0347d9905db504048fd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:53 -0700 Subject: [PATCH 086/189] sr9800: Check for supported Wake-on-LAN modes [ Upstream commit c5cb93e994ffb43b7b3b1ff10b9f928f54574a36 ] The driver currently silently accepts unsupported Wake-on-LAN modes (other than WAKE_PHY or WAKE_MAGIC) without reporting that to the user, which is confusing. Fixes: 19a38d8e0aa3 ("USB2NET : SR9800 : One chip USB2.0 USB2NET SR9800 Device Driver Support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/sr9800.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index a50df0d8fb9a..004c955c1fd1 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -421,6 +421,9 @@ sr_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt = 0; + if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) + return -EINVAL; + if (wolinfo->wolopts & WAKE_PHY) opt |= SR_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) From 2bb181d8a8ec755e5f977fe8e74fc81e86f7af4d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:54 -0700 Subject: [PATCH 087/189] r8152: Check for supported Wake-on-LAN Modes [ Upstream commit f2750df1548bd8a2b060eb609fc43ca82811af4c ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: 21ff2e8976b1 ("r8152: support WOL") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 2bb336cb13ee..2d83689374bb 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3663,6 +3663,9 @@ static int rtl8152_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (!rtl_can_wakeup(tp)) return -EOPNOTSUPP; + if (wol->wolopts & ~WAKE_ANY) + return -EINVAL; + ret = usb_autopm_get_interface(tp->intf); if (ret < 0) goto out_set_wol; From 66e43f427d5d3ee003a3a5f38a8673b8c94e2b84 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:55 -0700 Subject: [PATCH 088/189] smsc75xx: Check for Wake-on-LAN modes [ Upstream commit 9c734b2769a73eea2e9e9767c0e0bf839ff23679 ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: 6c636503260d ("smsc75xx: add wol magic packet support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/smsc75xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 8dbe086e0a96..234febc6e1d9 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -728,6 +728,9 @@ static int smsc75xx_ethtool_set_wol(struct net_device *net, struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); From 863fdd166500af2613aad12552cf4eb503845e47 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 28 Sep 2018 16:18:56 -0700 Subject: [PATCH 089/189] smsc95xx: Check for Wake-on-LAN modes [ Upstream commit c530c471ba37bdd9fe1c7185b01455c00ae606fb ] The driver does not check for Wake-on-LAN modes specified by an user, but will conditionally set the device as wake-up enabled or not based on that, which could be a very confusing user experience. Fixes: e0e474a83c18 ("smsc95xx: add wol magic packet support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/smsc95xx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 66b3ab9f614e..7cee7777d13f 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -727,6 +727,9 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net, struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); int ret; + if (wolinfo->wolopts & ~SUPPORTED_WAKE) + return -EINVAL; + pdata->wolopts = wolinfo->wolopts & SUPPORTED_WAKE; ret = device_set_wakeup_enable(&dev->udev->dev, pdata->wolopts); From 4f0336a8a0084e3bbddf71af87869aa9c612669a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 23 Sep 2018 18:13:43 +0200 Subject: [PATCH 090/189] perf/ring_buffer: Prevent concurent ring buffer access [ Upstream commit cd6fb677ce7e460c25bdd66f689734102ec7d642 ] Some of the scheduling tracepoints allow the perf_tp_event code to write to ring buffer under different cpu than the code is running on. This results in corrupted ring buffer data demonstrated in following perf commands: # perf record -e 'sched:sched_switch,sched:sched_wakeup' perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 0.383 [sec] [ perf record: Woken up 8 times to write data ] 0x42b890 [0]: failed to process type: -1765585640 [ perf record: Captured and wrote 4.825 MB perf.data (29669 samples) ] # perf report --stdio 0x42b890 [0]: failed to process type: -1765585640 The reason for the corruption are some of the scheduling tracepoints, that have __perf_task dfined and thus allow to store data to another cpu ring buffer: sched_waking sched_wakeup sched_wakeup_new sched_stat_wait sched_stat_sleep sched_stat_iowait sched_stat_blocked The perf_tp_event function first store samples for current cpu related events defined for tracepoint: hlist_for_each_entry_rcu(event, head, hlist_entry) perf_swevent_event(event, count, &data, regs); And then iterates events of the 'task' and store the sample for any task's event that passes tracepoint checks: ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->attr.type != PERF_TYPE_TRACEPOINT) continue; if (event->attr.config != entry->type) continue; perf_swevent_event(event, count, &data, regs); } Above code can race with same code running on another cpu, ending up with 2 cpus trying to store under the same ring buffer, which is specifically not allowed. This patch prevents the problem, by allowing only events with the same current cpu to receive the event. NOTE: this requires the use of (per-task-)per-cpu buffers for this feature to work; perf-record does this. Signed-off-by: Jiri Olsa [peterz: small edits to Changelog] Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andrew Vagin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: e6dab5ffab59 ("perf/trace: Add ability to set a target task for events") Link: http://lkml.kernel.org/r/20180923161343.GB15054@krava Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 990ac41d8a5f..330fcd1b1822 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7018,6 +7018,8 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, goto unlock; list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { + if (event->cpu != smp_processor_id()) + continue; if (event->attr.type != PERF_TYPE_TRACEPOINT) continue; if (event->attr.config != entry->type) From 6596d0ab8c602b6bb9f4a8423ddd7fc3acb2418f Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Fri, 5 Oct 2018 08:48:27 -0500 Subject: [PATCH 091/189] net: cxgb3_main: fix a missing-check bug [ Upstream commit 2c05d88818ab6571816b93edce4d53703870d7ae ] In cxgb_extension_ioctl(), the command of the ioctl is firstly copied from the user-space buffer 'useraddr' to 'cmd' and checked through the switch statement. If the command is not as expected, an error code EOPNOTSUPP is returned. In the following execution, i.e., the cases of the switch statement, the whole buffer of 'useraddr' is copied again to a specific data structure, according to what kind of command is requested. However, after the second copy, there is no re-check on the newly-copied command. Given that the buffer 'useraddr' is in the user space, a malicious user can race to change the command between the two copies. By doing so, the attacker can supply malicious data to the kernel and cause undefined behavior. This patch adds a re-check in each case of the switch statement if there is a second copy in that case, to re-check whether the command obtained in the second copy is the same as the one in the first copy. If not, an error code EINVAL is returned. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 7ae8374bff13..3dd4c39640dc 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2147,6 +2147,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EPERM; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_SET_QSET_PARAMS) + return -EINVAL; if (t.qset_idx >= SGE_QSETS) return -EINVAL; if (!in_range(t.intr_lat, 0, M_NEWTIMER) || @@ -2246,6 +2248,9 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_GET_QSET_PARAMS) + return -EINVAL; + /* Display qsets for all ports when offload enabled */ if (test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) { q1 = 0; @@ -2291,6 +2296,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&edata, useraddr, sizeof(edata))) return -EFAULT; + if (edata.cmd != CHELSIO_SET_QSET_NUM) + return -EINVAL; if (edata.val < 1 || (edata.val > 1 && !(adapter->flags & USING_MSIX))) return -EINVAL; @@ -2331,6 +2338,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EPERM; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_LOAD_FW) + return -EINVAL; /* Check t.len sanity ? */ fw_data = memdup_user(useraddr + sizeof(t), t.len); if (IS_ERR(fw_data)) @@ -2354,6 +2363,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; + if (m.cmd != CHELSIO_SETMTUTAB) + return -EINVAL; if (m.nmtus != NMTUS) return -EINVAL; if (m.mtus[0] < 81) /* accommodate SACK */ @@ -2395,6 +2406,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EBUSY; if (copy_from_user(&m, useraddr, sizeof(m))) return -EFAULT; + if (m.cmd != CHELSIO_SET_PM) + return -EINVAL; if (!is_power_of_2(m.rx_pg_sz) || !is_power_of_2(m.tx_pg_sz)) return -EINVAL; /* not power of 2 */ @@ -2428,6 +2441,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EIO; /* need the memory controllers */ if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_GET_MEM) + return -EINVAL; if ((t.addr & 7) || (t.len & 7)) return -EINVAL; if (t.mem_id == MEM_CM) @@ -2480,6 +2495,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) return -EAGAIN; if (copy_from_user(&t, useraddr, sizeof(t))) return -EFAULT; + if (t.cmd != CHELSIO_SET_TRACE_FILTER) + return -EINVAL; tp = (const struct trace_params *)&t.sip; if (t.config_tx) From 9aae17f851e2828de33fa4330cf519397c9aa2bf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 8 Jun 2017 14:48:03 +0100 Subject: [PATCH 092/189] KEYS: put keyring if install_session_keyring_to_cred() fails [ Upstream commit d636bd9f12a66ea3775c9fabbf3f8e118253467a ] In join_session_keyring(), if install_session_keyring_to_cred() were to fail, we would leak the keyring reference, just like in the bug fixed by commit 23567fd052a9 ("KEYS: Fix keyring ref leak in join_session_keyring()"). Fortunately this cannot happen currently, but we really should be more careful. Do this by adding and using a new error label at which the keyring reference is dropped. Signed-off-by: Eric Biggers Signed-off-by: David Howells Signed-off-by: James Morris Signed-off-by: Sasha Levin --- security/keys/process_keys.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index ac1d5b2b1626..a7095372701e 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -808,15 +808,14 @@ long join_session_keyring(const char *name) ret = PTR_ERR(keyring); goto error2; } else if (keyring == new->session_keyring) { - key_put(keyring); ret = 0; - goto error2; + goto error3; } /* we've got a keyring - now to install it */ ret = install_session_keyring_to_cred(new, keyring); if (ret < 0) - goto error2; + goto error3; commit_creds(new); mutex_unlock(&key_session_mutex); @@ -826,6 +825,8 @@ long join_session_keyring(const char *name) okay: return ret; +error3: + key_put(keyring); error2: mutex_unlock(&key_session_mutex); error: From 997a4944ce4fdfc5a0ce3adee367ca5daff48244 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 12 Aug 2016 07:48:21 +0200 Subject: [PATCH 093/189] ipv6: suppress sparse warnings in IP6_ECN_set_ce() [ Upstream commit c15c0ab12fd62f2b19181d05c62d24bc9fa55a42 ] Pass the correct type __wsum to csum_sub() and csum_add(). This doesn't really change anything since __wsum really *is* __be32, but removes the address space warnings from sparse. Cc: Eric Dumazet Fixes: 34ae6a1aa054 ("ipv6: update skb->csum when CE mark is propagated") Signed-off-by: Johannes Berg Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/inet_ecn.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 0dc0a51da38f..dce2d586d9ce 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -128,7 +128,8 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) to = from | htonl(INET_ECN_CE << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(csum_sub(skb->csum, from), to); + skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), + (__force __wsum)to); return 1; } From ca4a744b1828d8f9c3b62ba84305b69361b515e6 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Tue, 2 Feb 2016 18:17:54 +0100 Subject: [PATCH 094/189] net: drop write-only stack variable [ Upstream commit 3575dbf2cbbc8e598f17ec441aed526dbea0e1bd ] Remove a write-only stack variable from unix_attach_fds(). This is a left-over from the security fix in: commit 712f4aad406bb1ed67f3f98d04c044191f0ff593 Author: willy tarreau Date: Sun Jan 10 07:54:56 2016 +0100 unix: properly account for FDs passed over unix sockets Signed-off-by: David Herrmann Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e05ec54ac53f..c6b1eec94911 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1531,7 +1531,6 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; unsigned char max_level = 0; - int unix_sock_count = 0; if (too_many_unix_fds(current)) return -ETOOMANYREFS; @@ -1539,11 +1538,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count - 1; i >= 0; i--) { struct sock *sk = unix_get_socket(scm->fp->fp[i]); - if (sk) { - unix_sock_count++; + if (sk) max_level = max(max_level, unix_sk(sk)->recursion_level); - } } if (unlikely(max_level > MAX_RECURSION_LEVEL)) return -ETOOMANYREFS; From 6b3d1619c1c8968bf890589af3a2ceabb672cf84 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Thu, 18 Feb 2016 21:29:08 +0100 Subject: [PATCH 095/189] ser_gigaset: use container_of() instead of detour [ Upstream commit 8d2c3ab4445640957d136caa3629857d63544a2a ] The purpose of gigaset_device_release() is to kfree() the struct ser_cardstate that contains our struct device. This is done via a bit of a detour. First we make our struct device's driver_data point to the container of our struct ser_cardstate (which is a struct cardstate). In gigaset_device_release() we then retrieve that driver_data again. And after that we finally kfree() the struct ser_cardstate that was saved in the struct cardstate. All of this can be achieved much easier by using container_of() to get from our struct device to its container, struct ser_cardstate. Do so. Note that at the time the detour was implemented commit b8b2c7d845d5 ("base/platform: assert that dev_pm_domain callbacks are called unconditionally") had just entered the tree. That commit disconnected our platform_device and our platform_driver. These were reconnected again in v4.5-rc2 through commit 25cad69f21f5 ("base/platform: Fix platform drivers with no probe callback"). And one of the consequences of that fix was that it broke the detour via driver_data. That's because it made __device_release_driver() stop being a NOP for our struct device and actually do stuff again. One of the things it now does, is setting our driver_data to NULL. That, in turn, makes it impossible for gigaset_device_release() to get to our struct cardstate. Which has the net effect of leaking a struct ser_cardstate at every call of this driver's tty close() operation. So using container_of() has the additional benefit of actually working. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Paul Bolle Acked-by: Tilman Schmidt Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/isdn/gigaset/ser-gigaset.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 74bf1a17ae7c..b90776ef56ec 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -373,13 +373,7 @@ static void gigaset_freecshw(struct cardstate *cs) static void gigaset_device_release(struct device *dev) { - struct cardstate *cs = dev_get_drvdata(dev); - - if (!cs) - return; - dev_set_drvdata(dev, NULL); - kfree(cs->hw.ser); - cs->hw.ser = NULL; + kfree(container_of(dev, struct ser_cardstate, dev.dev)); } /* @@ -408,7 +402,6 @@ static int gigaset_initcshw(struct cardstate *cs) cs->hw.ser = NULL; return rc; } - dev_set_drvdata(&cs->hw.ser->dev.dev, cs); tasklet_init(&cs->write_tasklet, gigaset_modem_fill, (unsigned long) cs); From 70b3d6c5aa7172eb2c86128ca19f038ae61a7fb7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 23 Jun 2016 14:03:47 -0400 Subject: [PATCH 096/189] tracing: Skip more functions when doing stack tracing of events [ Upstream commit be54f69c26193de31053190761e521903b89d098 ] # echo 1 > options/stacktrace # echo 1 > events/sched/sched_switch/enable # cat trace -0 [002] d..2 1982.525169: => save_stack_trace => __ftrace_trace_stack => trace_buffer_unlock_commit_regs => event_trigger_unlock_commit => trace_event_buffer_commit => trace_event_raw_event_sched_switch => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => start_secondary The above shows that we are seeing 6 functions before ever making it to the caller of the sched_switch event. # echo stacktrace > events/sched/sched_switch/trigger # cat trace -0 [002] d..3 2146.335208: => trace_event_buffer_commit => trace_event_raw_event_sched_switch => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => start_secondary The stacktrace trigger isn't as bad, because it adds its own skip to the stacktracing, but still has two events extra. One issue is that if the stacktrace passes its own "regs" then there should be no addition to the skip, as the regs will not include the functions being called. This was an issue that was fixed by commit 7717c6be6999 ("tracing: Fix stacktrace skip depth in trace_buffer_unlock_commit_regs()" as adding the skip number for kprobes made the probes not have any stack at all. But since this is only an issue when regs is being used, a skip should be added if regs is NULL. Now we have: # echo 1 > options/stacktrace # echo 1 > events/sched/sched_switch/enable # cat trace -0 [000] d..2 1297.676333: => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => rest_init => start_kernel => x86_64_start_reservations => x86_64_start_kernel # echo stacktrace > events/sched/sched_switch/trigger # cat trace -0 [002] d..3 1370.759745: => __schedule => schedule => schedule_preempt_disabled => cpu_startup_entry => start_secondary And kprobes are not touched. Reported-by: Peter Zijlstra Signed-off-by: Steven Rostedt Signed-off-by: Sasha Levin --- kernel/trace/trace.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e409ddce8754..1a47a64d623f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1757,7 +1757,17 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, { __buffer_unlock_commit(buffer, event); - ftrace_trace_stack(tr, buffer, flags, 0, pc, regs); + /* + * If regs is not set, then skip the following callers: + * trace_buffer_unlock_commit_regs + * event_trigger_unlock_commit + * trace_event_buffer_commit + * trace_event_raw_event_sched_switch + * Note, we can still get here via blktrace, wakeup tracer + * and mmiotrace, but that's ok if they lose a function or + * two. They are that meaningful. + */ + ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); @@ -1815,6 +1825,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, trace.nr_entries = 0; trace.skip = skip; + /* + * Add two, for this function and the call to save_stack_trace() + * If regs is set, then these functions will not be in the way. + */ + if (!regs) + trace.skip += 2; + /* * Since events can happen in NMIs there's no safe way to * use the per cpu ftrace_stacks. We reserve it and if an interrupt From 011859fdb4764ccf46e6e36dbdfeb3442a17a71e Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 1 Apr 2016 08:52:58 +0100 Subject: [PATCH 097/189] ARM: dts: apq8064: add ahci ports-implemented mask [ Upstream commit bb4add2ce991e4ec891b5a0287fd1ab77b631979 ] This patch adds new ports-implemented mask, which is required to get achi working on the mainline. Without this patch value read from PORTS_IMPL register which is zero would not enable any ports for software to use. Fixes: 566d1827df2e ("libata: disable forced PORTS_IMPL for >= AHCI 1.3") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Srinivas Kandagatla Reviewed-by: Andy Gross Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/qcom-apq8064.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi index e00d50ef678f..3ff5ea16ebb3 100644 --- a/arch/arm/boot/dts/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom-apq8064.dtsi @@ -577,7 +577,7 @@ }; sata0: sata@29000000 { - compatible = "generic-ahci"; + compatible = "qcom,apq8064-ahci", "generic-ahci"; status = "disabled"; reg = <0x29000000 0x180>; interrupts = ; @@ -599,6 +599,7 @@ phys = <&sata_phy0>; phy-names = "sata-phy"; + ports-implemented = <0x1>; }; /* Temporary fixed regulator */ From 9903f3abf2a3e088d485551726814e90a5c37f68 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Tue, 20 Sep 2016 14:26:21 +0100 Subject: [PATCH 098/189] x86/mm/pat: Prevent hang during boot when mapping pages [ Upstream commit e535ec0899d1fe52ec3a84c9bc03457ac67ad6f7 ] There's a mixture of signed 32-bit and unsigned 32-bit and 64-bit data types used for keeping track of how many pages have been mapped. This leads to hangs during boot when mapping large numbers of pages (multiple terabytes, as reported by Waiman) because those values are interpreted as being negative. commit 742563777e8d ("x86/mm/pat: Avoid truncation when converting cpa->numpages to address") fixed one of those bugs, but there is another lurking in __change_page_attr_set_clr(). Additionally, the return value type for the populate_*() functions can return negative values when a large number of pages have been mapped, triggering the error paths even though no error occurred. Consistently use 64-bit types on 64-bit platforms when counting pages. Even in the signed case this gives us room for regions 8PiB (pebibytes) in size whilst still allowing the usual negative value error checking idiom. Reported-by: Waiman Long Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds CC: Theodore Ts'o Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Scott J Norton Cc: Douglas Hatch Signed-off-by: Matt Fleming Signed-off-by: Sasha Levin --- arch/x86/mm/pageattr.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 0e1dd7d47f05..26598e08666c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -955,11 +955,11 @@ static void populate_pte(struct cpa_data *cpa, } } -static int populate_pmd(struct cpa_data *cpa, - unsigned long start, unsigned long end, - unsigned num_pages, pud_t *pud, pgprot_t pgprot) +static long populate_pmd(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pud_t *pud, pgprot_t pgprot) { - unsigned int cur_pages = 0; + long cur_pages = 0; pmd_t *pmd; pgprot_t pmd_pgprot; @@ -1029,12 +1029,12 @@ static int populate_pmd(struct cpa_data *cpa, return num_pages; } -static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, - pgprot_t pgprot) +static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, + pgprot_t pgprot) { pud_t *pud; unsigned long end; - int cur_pages = 0; + long cur_pages = 0; pgprot_t pud_pgprot; end = start + (cpa->numpages << PAGE_SHIFT); @@ -1090,7 +1090,7 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, /* Map trailing leftover */ if (start < end) { - int tmp; + long tmp; pud = pud_offset(pgd, start); if (pud_none(*pud)) @@ -1116,7 +1116,7 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) pgprot_t pgprot = __pgprot(_KERNPG_TABLE); pud_t *pud = NULL; /* shut up gcc */ pgd_t *pgd_entry; - int ret; + long ret; pgd_entry = cpa->pgd + pgd_index(addr); @@ -1351,7 +1351,8 @@ static int cpa_process_alias(struct cpa_data *cpa) static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) { - int ret, numpages = cpa->numpages; + unsigned long numpages = cpa->numpages; + int ret; while (numpages) { /* From 11eea056980680aa4a24870165da340f4b32d06d Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 20 Jul 2016 15:45:00 -0700 Subject: [PATCH 099/189] radix-tree: fix radix_tree_iter_retry() for tagged iterators. [ Upstream commit 3cb9185c67304b2a7ea9be73e7d13df6fb2793a1 ] radix_tree_iter_retry() resets slot to NULL, but it doesn't reset tags. Then NULL slot and non-zero iter.tags passed to radix_tree_next_slot() leading to crash: RIP: radix_tree_next_slot include/linux/radix-tree.h:473 find_get_pages_tag+0x334/0x930 mm/filemap.c:1452 .... Call Trace: pagevec_lookup_tag+0x3a/0x80 mm/swap.c:960 mpage_prepare_extent_to_map+0x321/0xa90 fs/ext4/inode.c:2516 ext4_writepages+0x10be/0x2b20 fs/ext4/inode.c:2736 do_writepages+0x97/0x100 mm/page-writeback.c:2364 __filemap_fdatawrite_range+0x248/0x2e0 mm/filemap.c:300 filemap_write_and_wait_range+0x121/0x1b0 mm/filemap.c:490 ext4_sync_file+0x34d/0xdb0 fs/ext4/fsync.c:115 vfs_fsync_range+0x10a/0x250 fs/sync.c:195 vfs_fsync fs/sync.c:209 do_fsync+0x42/0x70 fs/sync.c:219 SYSC_fdatasync fs/sync.c:232 SyS_fdatasync+0x19/0x20 fs/sync.c:230 entry_SYSCALL_64_fastpath+0x23/0xc1 arch/x86/entry/entry_64.S:207 We must reset iterator's tags to bail out from radix_tree_next_slot() and go to the slow-path in radix_tree_next_chunk(). Fixes: 46437f9a554f ("radix-tree: fix race in gang lookup") Link: http://lkml.kernel.org/r/1468495196-10604-1-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reported-by: Dmitry Vyukov Acked-by: Konstantin Khlebnikov Cc: Matthew Wilcox Cc: Hugh Dickins Cc: Ross Zwisler Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/radix-tree.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 5d5174b59802..673dee29a9b9 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -382,6 +382,7 @@ static inline __must_check void **radix_tree_iter_retry(struct radix_tree_iter *iter) { iter->next_index = iter->index; + iter->tags = 0; return NULL; } From 80176161f4514af9064f75a52a5dece6084128b5 Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Fri, 23 Jun 2017 19:32:28 +0200 Subject: [PATCH 100/189] af_iucv: Move sockaddr length checks to before accessing sa_family in bind and connect handlers [ Upstream commit e3c42b61ff813921ba58cfc0019e3fd63f651190 ] Verify that the caller-provided sockaddr structure is large enough to contain the sa_family field, before accessing it in bind() and connect() handlers of the AF_IUCV socket. Since neither syscall enforces a minimum size of the corresponding memory region, very short sockaddrs (zero or one byte long) result in operating on uninitialized memory while referencing .sa_family. Fixes: 52a82e23b9f2 ("af_iucv: Validate socket address length in iucv_sock_bind()") Signed-off-by: Mateusz Jurczyk [jwi: removed unneeded null-check for addr] Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/iucv/af_iucv.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index aeffb65181f5..5984cc35d508 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -705,10 +705,8 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, char uid[9]; /* Verify the input sockaddr */ - if (!addr || addr->sa_family != AF_IUCV) - return -EINVAL; - - if (addr_len < sizeof(struct sockaddr_iucv)) + if (addr_len < sizeof(struct sockaddr_iucv) || + addr->sa_family != AF_IUCV) return -EINVAL; lock_sock(sk); @@ -852,7 +850,7 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, struct iucv_sock *iucv = iucv_sk(sk); int err; - if (addr->sa_family != AF_IUCV || alen < sizeof(struct sockaddr_iucv)) + if (alen < sizeof(struct sockaddr_iucv) || addr->sa_family != AF_IUCV) return -EINVAL; if (sk->sk_state != IUCV_OPEN && sk->sk_state != IUCV_BOUND) From 61918dbc7efef25294f32431869720d99772b4ca Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 27 Oct 2016 16:27:16 +0300 Subject: [PATCH 101/189] net/mlx4_en: Resolve dividing by zero in 32-bit system [ Upstream commit 4850cf4581578216468b7b3c3d06cc5abb0a697d ] When doing roundup_pow_of_two for large enough number with bit 31, an overflow will occur and a value equal to 1 will be returned. In this case 1 will be subtracted from the return value and division by zero will be reached. Fixes: 31c128b66e5b ("net/mlx4_en: Choose time-stamping shift value according to HW frequency") Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 4dccf7287f0f..52e4ed2f639d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -251,8 +251,11 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 tmp_rounded = + roundup_pow_of_two(max_val_cycles) > max_val_cycles ? + roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1; + max_val_cycles : tmp_rounded; /* calculate max possible multiplier in order to fit in 64bit */ u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); From 274337f8da92f188edb798e0fcdccb80618bcf79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 1 Mar 2017 14:45:06 -0800 Subject: [PATCH 102/189] ipv6: orphan skbs in reassembly unit [ Upstream commit 48cac18ecf1de82f76259a54402c3adb7839ad01 ] Andrey reported a use-after-free in IPv6 stack. Issue here is that we free the socket while it still has skb in TX path and in some queues. It happens here because IPv6 reassembly unit messes skb->truesize, breaking skb_set_owner_w() badly. We fixed a similar issue for IPV4 in commit 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") Acked-by: Joe Stringer ================================================================== BUG: KASAN: use-after-free in sock_wfree+0x118/0x120 Read of size 8 at addr ffff880062da0060 by task a.out/4140 page:ffffea00018b6800 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x100000000008100(slab|head) raw: 0100000000008100 0000000000000000 0000000000000000 0000000180130013 raw: dead000000000100 dead000000000200 ffff88006741f140 0000000000000000 page dumped because: kasan: bad access detected CPU: 0 PID: 4140 Comm: a.out Not tainted 4.10.0-rc3+ #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 dump_stack+0x292/0x398 lib/dump_stack.c:51 describe_address mm/kasan/report.c:262 kasan_report_error+0x121/0x560 mm/kasan/report.c:370 kasan_report mm/kasan/report.c:392 __asan_report_load8_noabort+0x3e/0x40 mm/kasan/report.c:413 sock_flag ./arch/x86/include/asm/bitops.h:324 sock_wfree+0x118/0x120 net/core/sock.c:1631 skb_release_head_state+0xfc/0x250 net/core/skbuff.c:655 skb_release_all+0x15/0x60 net/core/skbuff.c:668 __kfree_skb+0x15/0x20 net/core/skbuff.c:684 kfree_skb+0x16e/0x4e0 net/core/skbuff.c:705 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304 inet_frag_put ./include/net/inet_frag.h:133 nf_ct_frag6_gather+0x1125/0x38b0 net/ipv6/netfilter/nf_conntrack_reasm.c:617 ipv6_defrag+0x21b/0x350 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68 nf_hook_entry_hookfn ./include/linux/netfilter.h:102 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310 nf_hook ./include/linux/netfilter.h:212 __ip6_local_out+0x52c/0xaf0 net/ipv6/output_core.c:160 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742 rawv6_push_pending_frames net/ipv6/raw.c:613 rawv6_sendmsg+0x2cff/0x4130 net/ipv6/raw.c:927 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 sock_sendmsg+0xca/0x110 net/socket.c:645 sock_write_iter+0x326/0x620 net/socket.c:848 new_sync_write fs/read_write.c:499 __vfs_write+0x483/0x760 fs/read_write.c:512 vfs_write+0x187/0x530 fs/read_write.c:560 SYSC_write fs/read_write.c:607 SyS_write+0xfb/0x230 fs/read_write.c:599 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 RIP: 0033:0x7ff26e6f5b79 RSP: 002b:00007ff268e0ed98 EFLAGS: 00000206 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007ff268e0f9c0 RCX: 00007ff26e6f5b79 RDX: 0000000000000010 RSI: 0000000020f50fe1 RDI: 0000000000000003 RBP: 00007ff26ebc1220 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000206 R12: 0000000000000000 R13: 00007ff268e0f9c0 R14: 00007ff26efec040 R15: 0000000000000003 The buggy address belongs to the object at ffff880062da0000 which belongs to the cache RAWv6 of size 1504 The buggy address ffff880062da0060 is located 96 bytes inside of 1504-byte region [ffff880062da0000, ffff880062da05e0) Freed by task 4113: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 slab_free_hook mm/slub.c:1352 slab_free_freelist_hook mm/slub.c:1374 slab_free mm/slub.c:2951 kmem_cache_free+0xb2/0x2c0 mm/slub.c:2973 sk_prot_free net/core/sock.c:1377 __sk_destruct+0x49c/0x6e0 net/core/sock.c:1452 sk_destruct+0x47/0x80 net/core/sock.c:1460 __sk_free+0x57/0x230 net/core/sock.c:1468 sk_free+0x23/0x30 net/core/sock.c:1479 sock_put ./include/net/sock.h:1638 sk_common_release+0x31e/0x4e0 net/core/sock.c:2782 rawv6_close+0x54/0x80 net/ipv6/raw.c:1214 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:431 sock_release+0x8d/0x1e0 net/socket.c:599 sock_close+0x16/0x20 net/socket.c:1063 __fput+0x332/0x7f0 fs/file_table.c:208 ____fput+0x15/0x20 fs/file_table.c:244 task_work_run+0x19b/0x270 kernel/task_work.c:116 exit_task_work ./include/linux/task_work.h:21 do_exit+0x186b/0x2800 kernel/exit.c:839 do_group_exit+0x149/0x420 kernel/exit.c:943 SYSC_exit_group kernel/exit.c:954 SyS_exit_group+0x1d/0x20 kernel/exit.c:952 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 Allocated by task 4115: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:544 slab_post_alloc_hook mm/slab.h:432 slab_alloc_node mm/slub.c:2708 slab_alloc mm/slub.c:2716 kmem_cache_alloc+0x1af/0x250 mm/slub.c:2721 sk_prot_alloc+0x65/0x2a0 net/core/sock.c:1334 sk_alloc+0x105/0x1010 net/core/sock.c:1396 inet6_create+0x44d/0x1150 net/ipv6/af_inet6.c:183 __sock_create+0x4f6/0x880 net/socket.c:1199 sock_create net/socket.c:1239 SYSC_socket net/socket.c:1269 SyS_socket+0xf9/0x230 net/socket.c:1249 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 Memory state around the buggy address: ffff880062d9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff880062d9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff880062da0000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff880062da0080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff880062da0100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Reported-by: Andrey Konovalov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/netfilter/nf_conntrack_reasm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 838b65a59a73..5a9ae56e7868 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -601,6 +601,7 @@ struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 use hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); + skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq == NULL) { From 53025e7f5667cc7b5915ead628867fd1db983a76 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 23 May 2017 17:32:31 -0700 Subject: [PATCH 103/189] um: Avoid longjmp/setjmp symbol clashes with libpthread.a [ Upstream commit f44f1e7da7c8e3f4575d5d61c4df978496903fcc ] Building a statically linked UML kernel on a Centos 6.9 host resulted in the following linking failure (GCC 4.4, glibc-2.12): /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../lib64/libpthread.a(libpthread.o): In function `siglongjmp': (.text+0x8490): multiple definition of `longjmp' arch/x86/um/built-in.o:/local/users/fainelli/openwrt/trunk/build_dir/target-x86_64_musl/linux-uml/linux-4.4.69/arch/x86/um/setjmp_64.S:44: first defined here /usr/lib/gcc/x86_64-redhat-linux/4.4.7/../../../../lib64/libpthread.a(libpthread.o): In function `sem_open': (.text+0x77cd): warning: the use of `mktemp' is dangerous, better use `mkstemp' collect2: ld returned 1 exit status make[4]: *** [vmlinux] Error 1 Adopt a solution similar to the one done for vmap where we define longjmp/setjmp to be kernel_longjmp/setjmp. In the process, make sure we do rename the functions in arch/x86/um/setjmp_*.S accordingly. Fixes: a7df4716d195 ("um: link with -lpthread") Signed-off-by: Florian Fainelli Signed-off-by: Richard Weinberger Signed-off-by: Sasha Levin --- arch/um/Makefile | 4 ++++ arch/x86/um/setjmp_32.S | 16 ++++++++-------- arch/x86/um/setjmp_64.S | 16 ++++++++-------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/um/Makefile b/arch/um/Makefile index 9ccf462131c4..d9cd7ed27834 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -59,10 +59,14 @@ KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/um # Same things for in6addr_loopback and mktime - found in libc. For these two we # only get link-time error, luckily. # +# -Dlongjmp=kernel_longjmp prevents anything from referencing the libpthread.a +# embedded copy of longjmp, same thing for setjmp. +# # These apply to USER_CFLAGS to. KBUILD_CFLAGS += $(CFLAGS) $(CFLAGS-y) -D__arch_um__ \ $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ + -Dlongjmp=kernel_longjmp -Dsetjmp=kernel_setjmp \ -Din6addr_loopback=kernel_in6addr_loopback \ -Din6addr_any=kernel_in6addr_any -Dstrrchr=kernel_strrchr diff --git a/arch/x86/um/setjmp_32.S b/arch/x86/um/setjmp_32.S index b766792c9933..39053192918d 100644 --- a/arch/x86/um/setjmp_32.S +++ b/arch/x86/um/setjmp_32.S @@ -16,9 +16,9 @@ .text .align 4 - .globl setjmp - .type setjmp, @function -setjmp: + .globl kernel_setjmp + .type kernel_setjmp, @function +kernel_setjmp: #ifdef _REGPARM movl %eax,%edx #else @@ -35,13 +35,13 @@ setjmp: movl %ecx,20(%edx) # Return address ret - .size setjmp,.-setjmp + .size kernel_setjmp,.-kernel_setjmp .text .align 4 - .globl longjmp - .type longjmp, @function -longjmp: + .globl kernel_longjmp + .type kernel_longjmp, @function +kernel_longjmp: #ifdef _REGPARM xchgl %eax,%edx #else @@ -55,4 +55,4 @@ longjmp: movl 16(%edx),%edi jmp *20(%edx) - .size longjmp,.-longjmp + .size kernel_longjmp,.-kernel_longjmp diff --git a/arch/x86/um/setjmp_64.S b/arch/x86/um/setjmp_64.S index 45f547b4043e..c56942e1a38c 100644 --- a/arch/x86/um/setjmp_64.S +++ b/arch/x86/um/setjmp_64.S @@ -18,9 +18,9 @@ .text .align 4 - .globl setjmp - .type setjmp, @function -setjmp: + .globl kernel_setjmp + .type kernel_setjmp, @function +kernel_setjmp: pop %rsi # Return address, and adjust the stack xorl %eax,%eax # Return value movq %rbx,(%rdi) @@ -34,13 +34,13 @@ setjmp: movq %rsi,56(%rdi) # Return address ret - .size setjmp,.-setjmp + .size kernel_setjmp,.-kernel_setjmp .text .align 4 - .globl longjmp - .type longjmp, @function -longjmp: + .globl kernel_longjmp + .type kernel_longjmp, @function +kernel_longjmp: movl %esi,%eax # Return value (int) movq (%rdi),%rbx movq 8(%rdi),%rsp @@ -51,4 +51,4 @@ longjmp: movq 48(%rdi),%r15 jmp *56(%rdi) - .size longjmp,.-longjmp + .size kernel_longjmp,.-kernel_longjmp From 137b1ce318f0bd9847f68520cfd68c24e79ea258 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 Jan 2016 22:24:16 +0100 Subject: [PATCH 104/189] sched/cgroup: Fix cgroup entity load tracking tear-down [ Upstream commit 6fe1f348b3dd1f700f9630562b7d38afd6949568 ] When a cgroup's CPU runqueue is destroyed, it should remove its remaining load accounting from its parent cgroup. The current site for doing so it unsuited because its far too late and unordered against other cgroup removal (->css_free() will be, but we're also in an RCU callback). Put it in the ->css_offline() callback, which is the start of cgroup destruction, right after the group has been made unavailable to userspace. The ->css_offline() callbacks are called in hierarchical order after the following v4.4 commit: aa226ff4a1ce ("cgroup: make sure a parent css isn't offlined before its children") Signed-off-by: Peter Zijlstra (Intel) Cc: Christian Borntraeger Cc: Johannes Weiner Cc: Li Zefan Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160121212416.GL6357@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/sched/core.c | 4 +--- kernel/sched/fair.c | 37 +++++++++++++++++++++---------------- kernel/sched/sched.h | 2 +- 3 files changed, 23 insertions(+), 20 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 65ed3501c2ca..4743e1f2a3d1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7817,11 +7817,9 @@ void sched_destroy_group(struct task_group *tg) void sched_offline_group(struct task_group *tg) { unsigned long flags; - int i; /* end participation in shares distribution */ - for_each_possible_cpu(i) - unregister_fair_sched_group(tg, i); + unregister_fair_sched_group(tg); spin_lock_irqsave(&task_group_lock, flags); list_del_rcu(&tg->list); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3b136fb4422c..a0c5bb93a3ab 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8154,11 +8154,8 @@ void free_fair_sched_group(struct task_group *tg) for_each_possible_cpu(i) { if (tg->cfs_rq) kfree(tg->cfs_rq[i]); - if (tg->se) { - if (tg->se[i]) - remove_entity_load_avg(tg->se[i]); + if (tg->se) kfree(tg->se[i]); - } } kfree(tg->cfs_rq); @@ -8206,21 +8203,29 @@ err: return 0; } -void unregister_fair_sched_group(struct task_group *tg, int cpu) +void unregister_fair_sched_group(struct task_group *tg) { - struct rq *rq = cpu_rq(cpu); unsigned long flags; + struct rq *rq; + int cpu; - /* - * Only empty task groups can be destroyed; so we can speculatively - * check on_list without danger of it being re-added. - */ - if (!tg->cfs_rq[cpu]->on_list) - return; + for_each_possible_cpu(cpu) { + if (tg->se[cpu]) + remove_entity_load_avg(tg->se[cpu]); - raw_spin_lock_irqsave(&rq->lock, flags); - list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); - raw_spin_unlock_irqrestore(&rq->lock, flags); + /* + * Only empty task groups can be destroyed; so we can speculatively + * check on_list without danger of it being re-added. + */ + if (!tg->cfs_rq[cpu]->on_list) + continue; + + rq = cpu_rq(cpu); + + raw_spin_lock_irqsave(&rq->lock, flags); + list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); + raw_spin_unlock_irqrestore(&rq->lock, flags); + } } void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, @@ -8302,7 +8307,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) return 1; } -void unregister_fair_sched_group(struct task_group *tg, int cpu) { } +void unregister_fair_sched_group(struct task_group *tg) { } #endif /* CONFIG_FAIR_GROUP_SCHED */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0c9ebd82a684..af8d8c3eb8ab 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -308,7 +308,7 @@ extern int tg_nop(struct task_group *tg, void *data); extern void free_fair_sched_group(struct task_group *tg); extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent); -extern void unregister_fair_sched_group(struct task_group *tg, int cpu); +extern void unregister_fair_sched_group(struct task_group *tg); extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, struct sched_entity *se, int cpu, struct sched_entity *parent); From c1504091b9805d131abb36a1f01af7a5c99ac9fc Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 17 Aug 2016 21:58:33 -0400 Subject: [PATCH 105/189] btrfs: don't create or leak aliased root while cleaning up orphans [ Upstream commit 35bbb97fc898aeb874cb7c8b746f091caa359994 ] commit 909c3a22da3 (Btrfs: fix loading of orphan roots leading to BUG_ON) avoids the BUG_ON but can add an aliased root to the dead_roots list or leak the root. Since we've already been loading roots into the radix tree, we should use it before looking the root up on disk. Cc: # 4.5 Signed-off-by: Jeff Mahoney Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Sasha Levin --- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/root-tree.c | 27 ++++++++++++++++++--------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ae6e3a30e61e..8dbb00fbb00b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1608,8 +1608,8 @@ fail: return ret; } -static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, - u64 root_id) +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) { struct btrfs_root *root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index adeb31830b9c..3c9819403487 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, struct btrfs_key *location); int btrfs_init_fs_root(struct btrfs_root *root); +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id); int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 2c849b08a91b..6a6efb26d52f 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) root_key.objectid = key.offset; key.offset++; + /* + * The root might have been inserted already, as before we look + * for orphan roots, log replay might have happened, which + * triggers a transaction commit and qgroup accounting, which + * in turn reads and inserts fs roots while doing backref + * walking. + */ + root = btrfs_lookup_fs_root(tree_root->fs_info, + root_key.objectid); + if (root) { + WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, + &root->state)); + if (btrfs_root_refs(&root->root_item) == 0) + btrfs_add_dead_root(root); + continue; + } + root = btrfs_read_fs_root(tree_root, &root_key); err = PTR_ERR_OR_ZERO(root); if (err && err != -ENOENT) { @@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); err = btrfs_insert_fs_root(root->fs_info, root); - /* - * The root might have been inserted already, as before we look - * for orphan roots, log replay might have happened, which - * triggers a transaction commit and qgroup accounting, which - * in turn reads and inserts fs roots while doing backref - * walking. - */ - if (err == -EEXIST) - err = 0; if (err) { + BUG_ON(err == -EEXIST); btrfs_free_fs_root(root); break; } From 90f9ed93549e68cd5e349d7c3a8ce0983728f1fe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2016 17:44:11 +0100 Subject: [PATCH 106/189] thermal: allow spear-thermal driver to be a module [ Upstream commit 4d2f1794c07aae55b8f25f4d8aebcafc0d3e501d ] When the thermal subsystem is a loadable module, the spear driver fails to build: drivers/thermal/built-in.o: In function `spear_thermal_exit': spear_thermal.c:(.text+0xf8): undefined reference to `thermal_zone_device_unregister' drivers/thermal/built-in.o: In function `spear_thermal_probe': spear_thermal.c:(.text+0x230): undefined reference to `thermal_zone_device_register' This changes the symbol to a tristate, so Kconfig can track the dependency correctly. Signed-off-by: Arnd Bergmann Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin --- drivers/thermal/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 4b660b5beb98..b556fa8250fc 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -195,7 +195,7 @@ config IMX_THERMAL passive trip is crossed. config SPEAR_THERMAL - bool "SPEAr thermal sensor driver" + tristate "SPEAr thermal sensor driver" depends on PLAT_SPEAR || COMPILE_TEST depends on OF help From c8a5f83fa43e8464f7bef12a8dc22f4e778bd78b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2016 17:44:12 +0100 Subject: [PATCH 107/189] thermal: allow u8500-thermal driver to be a module [ Upstream commit 26716ce124fce88f288f07738ef685d5dfe5c13f ] When the thermal subsystem is a loadable module, the u8500 driver fails to build: drivers/thermal/built-in.o: In function `db8500_thermal_probe': db8500_thermal.c:(.text+0x96c): undefined reference to `thermal_zone_device_register' drivers/thermal/built-in.o: In function `db8500_thermal_work': db8500_thermal.c:(.text+0xab4): undefined reference to `thermal_zone_device_update' This changes the symbol to a tristate, so Kconfig can track the dependency correctly. Signed-off-by: Arnd Bergmann Signed-off-by: Eduardo Valentin Signed-off-by: Sasha Levin --- drivers/mfd/db8500-prcmu.c | 3 +++ drivers/thermal/Kconfig | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index e6e4bacb09ee..12099b09a9a7 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -2048,6 +2048,7 @@ int db8500_prcmu_config_hotmon(u8 low, u8 high) return 0; } +EXPORT_SYMBOL_GPL(db8500_prcmu_config_hotmon); static int config_hot_period(u16 val) { @@ -2074,11 +2075,13 @@ int db8500_prcmu_start_temp_sense(u16 cycles32k) return config_hot_period(cycles32k); } +EXPORT_SYMBOL_GPL(db8500_prcmu_start_temp_sense); int db8500_prcmu_stop_temp_sense(void) { return config_hot_period(0xFFFF); } +EXPORT_SYMBOL_GPL(db8500_prcmu_stop_temp_sense); static int prcmu_a9wdog(u8 cmd, u8 d0, u8 d1, u8 d2, u8 d3) { diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index b556fa8250fc..1def65d2f0b5 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -237,8 +237,8 @@ config DOVE_THERMAL framework. config DB8500_THERMAL - bool "DB8500 thermal management" - depends on ARCH_U8500 + tristate "DB8500 thermal management" + depends on MFD_DB8500_PRCMU default y help Adds DB8500 thermal management implementation according to the thermal From c447410b6dfacf132865e79af7bc26fa977f24e5 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Sat, 13 Feb 2016 11:58:16 +0200 Subject: [PATCH 108/189] tpm: fix: return rc when devm_add_action() fails [ Upstream commit 4f3b193dee4423d8c89c9a3e8e05f9197ea459a4 ] Call put_device() and return error code if devm_add_action() fails. Signed-off-by: Jarkko Sakkinen Reported-by: Jason Gunthorpe Fixes: 8e0ee3c9faed ("tpm: fix the cleanup of struct tpm_chip") Signed-off-by: Sasha Levin --- drivers/char/tpm/tpm-chip.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index e759100e41a7..28894878dcd5 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -230,7 +230,11 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, chip->cdev.owner = dev->driver->owner; chip->cdev.kobj.parent = &chip->dev.kobj; - devm_add_action(dev, (void (*)(void *)) put_device, &chip->dev); + rc = devm_add_action(dev, (void (*)(void *)) put_device, &chip->dev); + if (rc) { + put_device(&chip->dev); + return ERR_PTR(rc); + } return chip; } From 92fe37c0f09c48ba42300d7ac79c0750e48d7a3f Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Wed, 11 May 2016 12:27:15 -0400 Subject: [PATCH 109/189] x86/PCI: Mark Broadwell-EP Home Agent 1 as having non-compliant BARs [ Upstream commit da77b67195de1c65bef4908fa29967c4d0af2da2 ] Commit b894157145e4 ("x86/PCI: Mark Broadwell-EP Home Agent & PCU as having non-compliant BARs") marked Home Agent 0 & PCU has having non-compliant BARs. Home Agent 1 also has non-compliant BARs. Mark Home Agent 1 as having non-compliant BARs so the PCI core doesn't touch them. The problem with these devices is documented in the Xeon v4 specification update: BDF2 PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration Problem: During system initialization the Operating System may access the standard PCI BARs (Base Address Registers). Due to this erratum, accesses to the Home Agent BAR registers (Bus 1; Device 18; Function 0,4; Offsets (0x14-0x24) will return non-zero values. Implication: The operating system may issue a warning. Intel has not observed any functional failures due to this erratum. Link: http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html Fixes: b894157145e4 ("x86/PCI: Mark Broadwell-EP Home Agent & PCU as having non-compliant BARs") Signed-off-by: Prarit Bhargava Signed-off-by: Bjorn Helgaas CC: Thomas Gleixner CC: Ingo Molnar CC: "H. Peter Anvin" CC: Andi Kleen Signed-off-by: Sasha Levin --- arch/x86/pci/fixup.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 0ae7e9fa348d..89f90549c6a8 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -541,9 +541,16 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); +/* + * Broadwell EP Home Agent BARs erroneously return non-zero values when read. + * + * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html + * entry BDF2. + */ static void pci_bdwep_bar(struct pci_dev *dev) { dev->non_compliant_bars = 1; } +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar); From 93e8e691675a33960b607d9fd0858d5a35f0e419 Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Mon, 25 Apr 2016 23:31:12 -0700 Subject: [PATCH 110/189] aacraid: Start adapter after updating number of MSIX vectors [ Upstream commit 116d77fea02e2a5aded7d29ba4c692774cb339f1 ] The adapter has to be started after updating the number of MSIX Vectors Fixes: ecc479e00db8 (aacraid: Set correct MSIX count for EEH recovery) Cc: stable@vger.kernel.org Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/aacraid/linit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 8da8b46da722..1c447405ebbf 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -1416,8 +1416,8 @@ static int aac_acquire_resources(struct aac_dev *dev) /* After EEH recovery or suspend resume, max_msix count * may change, therfore updating in init as well. */ - aac_adapter_start(dev); dev->init->Sa_MSIXVectors = cpu_to_le32(dev->max_msix); + aac_adapter_start(dev); } return 0; From 5796c70e5b1d62016bdc3b678346ffbd433d030a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Mon, 21 Mar 2016 10:02:42 +0200 Subject: [PATCH 111/189] perf/core: Don't leak event in the syscall error path [ Upstream commit 201c2f85bd0bc13b712d9c0b3d11251b182e06ae ] In the error path, event_file not being NULL is used to determine whether the event itself still needs to be free'd, so fix it up to avoid leaking. Reported-by: Leon Yu Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 130056275ade ("perf: Do not double free") Link: http://lkml.kernel.org/r/87twk06yxp.fsf@ashishki-desk.ger.corp.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- kernel/events/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 330fcd1b1822..19519ec39d51 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8532,6 +8532,7 @@ SYSCALL_DEFINE5(perf_event_open, f_flags); if (IS_ERR(event_file)) { err = PTR_ERR(event_file); + event_file = NULL; goto err_context; } From af521c7f828f49ec73324c47bef073d39e0d93ba Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 31 Jan 2016 14:14:52 -0200 Subject: [PATCH 112/189] usbvision: revert commit 588afcc1 [ Upstream commit d5468d7afaa9c9e961e150f0455a14a9f4872a98 ] Commit 588afcc1c0e4 ("[media] usbvision fix overflow of interfaces array")' should be reverted, because: * "!dev->actconfig->interface[ifnum]" won't catch a case where the value is not NULL but some garbage. This way the system may crash later with GPF. * "(ifnum >= USB_MAXINTERFACES)" does not cover all the error conditions. "ifnum" should be compared to "dev->actconfig-> desc.bNumInterfaces", i.e. compared to the number of "struct usb_interface" kzalloc()-ed, not to USB_MAXINTERFACES. * There is a "struct usb_device" leak in this error path, as there is usb_get_dev(), but no usb_put_dev() on this path. * There is a bug of the same type several lines below with number of endpoints. The code is accessing hard-coded second endpoint ("interface->endpoint[1].desc") which may not exist. It would be great to handle this in the same patch too. * All the concerns above are resolved by already-accepted commit fa52bd50 ("[media] usbvision: fix crash on detecting device with invalid configuration") * Mailing list message: http://www.spinics.net/lists/linux-media/msg94832.html Signed-off-by: Vladis Dronov Signed-off-by: Hans Verkuil Cc: # for v4.5 Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/usbvision/usbvision-video.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c index cafc34938a79..91d709efef7a 100644 --- a/drivers/media/usb/usbvision/usbvision-video.c +++ b/drivers/media/usb/usbvision/usbvision-video.c @@ -1461,13 +1461,6 @@ static int usbvision_probe(struct usb_interface *intf, printk(KERN_INFO "%s: %s found\n", __func__, usbvision_device_data[model].model_string); - /* - * this is a security check. - * an exploit using an incorrect bInterfaceNumber is known - */ - if (ifnum >= USB_MAXINTERFACES || !dev->actconfig->interface[ifnum]) - return -ENODEV; - if (usbvision_device_data[model].interface >= 0) interface = &dev->actconfig->interface[usbvision_device_data[model].interface]->altsetting[0]; else if (ifnum < dev->actconfig->desc.bNumInterfaces) From 1c84d14eb7f4cefe455facae6b1ac06e7841db56 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 28 Oct 2016 08:21:03 +0100 Subject: [PATCH 113/189] MIPS: Fix FCSR Cause bit handling for correct SIGFPE issue [ Upstream commit 5a1aca4469fdccd5b74ba0b4e490173b2b447895 ] Sanitize FCSR Cause bit handling, following a trail of past attempts: * commit 4249548454f7 ("MIPS: ptrace: Fix FP context restoration FCSR regression"), * commit 443c44032a54 ("MIPS: Always clear FCSR cause bits after emulation"), * commit 64bedffe4968 ("MIPS: Clear [MSA]FPE CSR.Cause after notify_die()"), * commit b1442d39fac2 ("MIPS: Prevent user from setting FCSR cause bits"), * commit b54d2901517d ("Properly handle branch delay slots in connection with signals."). Specifically do not mask these bits out in ptrace(2) processing and send a SIGFPE signal instead whenever a matching pair of an FCSR Cause and Enable bit is seen as execution of an affected context is about to resume. Only then clear Cause bits, and even then do not clear any bits that are set but masked with the respective Enable bits. Adjust Cause bit clearing throughout code likewise, except within the FPU emulator proper where they are set according to IEEE 754 exceptions raised as the operation emulated executed. Do so so that any IEEE 754 exceptions subject to their default handling are recorded like with operations executed by FPU hardware. Signed-off-by: Maciej W. Rozycki Cc: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14460/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/include/asm/fpu_emulator.h | 13 +++++ arch/mips/include/asm/switch_to.h | 18 +++++++ arch/mips/kernel/mips-r2-to-r6-emul.c | 10 ++-- arch/mips/kernel/ptrace.c | 7 ++- arch/mips/kernel/traps.c | 72 +++++++++++++++------------ 5 files changed, 78 insertions(+), 42 deletions(-) diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index 2f021cdfba4f..742223716fc8 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h @@ -66,6 +66,8 @@ extern int do_dsemulret(struct pt_regs *xcp); extern int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, int has_fpu, void *__user *fault_addr); +void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, + struct task_struct *tsk); int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31); int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, @@ -92,4 +94,15 @@ static inline void fpu_emulator_init_fpu(void) set_fpr64(&t->thread.fpu.fpr[i], 0, SIGNALLING_NAN); } +/* + * Mask the FCSR Cause bits according to the Enable bits, observing + * that Unimplemented is always enabled. + */ +static inline unsigned long mask_fcr31_x(unsigned long fcr31) +{ + return fcr31 & (FPU_CSR_UNI_X | + ((fcr31 & FPU_CSR_ALL_E) << + (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E)))); +} + #endif /* _ASM_FPU_EMULATOR_H */ diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index ebb5c0f2f90d..c0ae27971e31 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -75,6 +75,22 @@ do { if (cpu_has_rw_llb) { \ } \ } while (0) +/* + * Check FCSR for any unmasked exceptions pending set with `ptrace', + * clear them and send a signal. + */ +#define __sanitize_fcr31(next) \ +do { \ + unsigned long fcr31 = mask_fcr31_x(next->thread.fpu.fcr31); \ + void __user *pc; \ + \ + if (unlikely(fcr31)) { \ + pc = (void __user *)task_pt_regs(next)->cp0_epc; \ + next->thread.fpu.fcr31 &= ~fcr31; \ + force_fcr31_sig(fcr31, pc, next); \ + } \ +} while (0) + /* * For newly created kernel threads switch_to() will return to * ret_from_kernel_thread, newly created user threads to ret_from_fork. @@ -85,6 +101,8 @@ do { if (cpu_has_rw_llb) { \ do { \ __mips_mt_fpaff_switch_to(prev); \ lose_fpu_inatomic(1, prev); \ + if (tsk_used_math(next)) \ + __sanitize_fcr31(next); \ if (cpu_has_dsp) { \ __save_dsp(prev); \ __restore_dsp(next); \ diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index cbe0f025856d..7b887027dca2 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -900,7 +900,7 @@ static inline int mipsr2_find_op_func(struct pt_regs *regs, u32 inst, * mipsr2_decoder: Decode and emulate a MIPS R2 instruction * @regs: Process register set * @inst: Instruction to decode and emulate - * @fcr31: Floating Point Control and Status Register returned + * @fcr31: Floating Point Control and Status Register Cause bits returned */ int mipsr2_decoder(struct pt_regs *regs, u32 inst, unsigned long *fcr31) { @@ -1183,13 +1183,13 @@ fpu_emul: err = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 0, &fault_addr); - *fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + *fcr31 = res = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~res; /* * this is a tricky issue - lose_fpu() uses LL/SC atomics diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 5a869515b393..9d04392f7ef0 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -79,16 +79,15 @@ void ptrace_disable(struct task_struct *child) } /* - * Poke at FCSR according to its mask. Don't set the cause bits as - * this is currently not handled correctly in FP context restoration - * and will cause an oops if a corresponding enable bit is set. + * Poke at FCSR according to its mask. Set the Cause bits even + * if a corresponding Enable bit is set. This will be noticed at + * the time the thread is switched to and SIGFPE thrown accordingly. */ static void ptrace_setfcr31(struct task_struct *child, u32 value) { u32 fcr31; u32 mask; - value &= ~FPU_CSR_ALL_X; fcr31 = child->thread.fpu.fcr31; mask = boot_cpu_data.fpu_msk31; child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 1b901218e3ae..6abd6b41c13d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -706,6 +706,32 @@ asmlinkage void do_ov(struct pt_regs *regs) exception_exit(prev_state); } +/* + * Send SIGFPE according to FCSR Cause bits, which must have already + * been masked against Enable bits. This is impotant as Inexact can + * happen together with Overflow or Underflow, and `ptrace' can set + * any bits. + */ +void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, + struct task_struct *tsk) +{ + struct siginfo si = { .si_addr = fault_addr, .si_signo = SIGFPE }; + + if (fcr31 & FPU_CSR_INV_X) + si.si_code = FPE_FLTINV; + else if (fcr31 & FPU_CSR_DIV_X) + si.si_code = FPE_FLTDIV; + else if (fcr31 & FPU_CSR_OVF_X) + si.si_code = FPE_FLTOVF; + else if (fcr31 & FPU_CSR_UDF_X) + si.si_code = FPE_FLTUND; + else if (fcr31 & FPU_CSR_INE_X) + si.si_code = FPE_FLTRES; + else + si.si_code = __SI_FAULT; + force_sig_info(SIGFPE, &si, tsk); +} + int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) { struct siginfo si = { 0 }; @@ -715,27 +741,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) return 0; case SIGFPE: - si.si_addr = fault_addr; - si.si_signo = sig; - /* - * Inexact can happen together with Overflow or Underflow. - * Respect the mask to deliver the correct exception. - */ - fcr31 &= (fcr31 & FPU_CSR_ALL_E) << - (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E)); - if (fcr31 & FPU_CSR_INV_X) - si.si_code = FPE_FLTINV; - else if (fcr31 & FPU_CSR_DIV_X) - si.si_code = FPE_FLTDIV; - else if (fcr31 & FPU_CSR_OVF_X) - si.si_code = FPE_FLTOVF; - else if (fcr31 & FPU_CSR_UDF_X) - si.si_code = FPE_FLTUND; - else if (fcr31 & FPU_CSR_INE_X) - si.si_code = FPE_FLTRES; - else - si.si_code = __SI_FAULT; - force_sig_info(sig, &si, current); + force_fcr31_sig(fcr31, fault_addr, current); return 1; case SIGBUS: @@ -798,13 +804,13 @@ static int simulate_fp(struct pt_regs *regs, unsigned int opcode, /* Run the emulator */ sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Restore the hardware register state */ own_fpu(1); @@ -830,7 +836,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) goto out; /* Clear FCSR.Cause before enabling interrupts */ - write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X); + write_32bit_cp1_register(CP1_STATUS, fcr31 & ~mask_fcr31_x(fcr31)); local_irq_enable(); die_if_kernel("FP exception in kernel code", regs); @@ -852,13 +858,13 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) /* Run the emulator */ sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Restore the hardware register state */ own_fpu(1); /* Using the FPU again. */ @@ -1431,13 +1437,13 @@ asmlinkage void do_cpu(struct pt_regs *regs) sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 0, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* * We can't allow the emulated instruction to leave - * any of the cause bits set in $fcr31. + * any enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Send a signal if required. */ if (!process_fpemu_return(sig, fault_addr, fcr31) && !err) From f629104427a6dc9ab20dc10424896777a2bbe506 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Jun 2016 14:34:30 +0200 Subject: [PATCH 114/189] ASoC: ak4613: Enable cache usage to fix crashes on resume [ Upstream commit dcd2d1f78664fdc75eadaaf65257834e24383d01 ] During system resume: kernel BUG at drivers/base/regmap/regcache.c:347! ... PC is at regcache_sync+0x1c/0x128 LR is at ak4613_resume+0x28/0x34 The ak4613 driver is using a regmap cache sync to restore the configuration of the chip on resume but does not actually define a register cache which means that the resume is never going to work and we trigger asserts in regmap. Fix this by enabling caching. Based on commit d3030d11961a8c10 ("ASoC: ak4642: Enable cache usage to fix crashes on resume") by Mark Brown . Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/ak4613.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/ak4613.c b/sound/soc/codecs/ak4613.c index 07a266460ec3..b4b36cc92ffe 100644 --- a/sound/soc/codecs/ak4613.c +++ b/sound/soc/codecs/ak4613.c @@ -143,6 +143,7 @@ static const struct regmap_config ak4613_regmap_cfg = { .max_register = 0x16, .reg_defaults = ak4613_reg, .num_reg_defaults = ARRAY_SIZE(ak4613_reg), + .cache_type = REGCACHE_RBTREE, }; static const struct of_device_id ak4613_of_match[] = { From 796c9c018b9a11399c3b7fb54f4d37485ad9fc6e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 16 Jun 2016 14:34:32 +0200 Subject: [PATCH 115/189] ASoC: wm8940: Enable cache usage to fix crashes on resume [ Upstream commit 50c7a0ef2d97e56c7ce2f1ea5fe1d8e25aadc1bb ] The wm8940 driver is using a regmap cache sync to restore the configuration of the chip when switching from OFF to STANDBY, but does not actually define a register cache which means that the restore is never going to work and we trigger asserts in regmap. Fix this by enabling caching. Based on commit d3030d11961a8c10 ("ASoC: ak4642: Enable cache usage to fix crashes on resume") by Mark Brown . Signed-off-by: Geert Uytterhoeven Acked-by: Charles Keepax Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8940.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c index f6f9395ea38e..1c600819f768 100644 --- a/sound/soc/codecs/wm8940.c +++ b/sound/soc/codecs/wm8940.c @@ -743,6 +743,7 @@ static const struct regmap_config wm8940_regmap = { .max_register = WM8940_MONOMIX, .reg_defaults = wm8940_reg_defaults, .num_reg_defaults = ARRAY_SIZE(wm8940_reg_defaults), + .cache_type = REGCACHE_RBTREE, .readable_reg = wm8940_readable_register, .volatile_reg = wm8940_volatile_register, From 22979776093abc47dcbc6a06eab41d24800ec32d Mon Sep 17 00:00:00 2001 From: Mark Syms Date: Tue, 29 Nov 2016 11:36:46 +0000 Subject: [PATCH 116/189] CIFS: handle guest access errors to Windows shares [ Upstream commit 40920c2bb119fd49ba03e2f97a172171781be442 ] Commit 1a967d6c9b39c226be1b45f13acd4d8a5ab3dc44 ("correctly to anonymous authentication for the NTLM(v2) authentication") introduces a regression in handling errors related to attempting a guest connection to a Windows share which requires authentication. This should result in a permission denied error but actually causes the kernel module to enter a never-ending loop trying to follow a DFS referal which doesn't exist. The base cause of this is the failure now occurs later in the process during tree connect and not at the session setup setup and all errors in tree connect are interpreted as needing to follow the DFS paths which isn't in this case correct. So, check the returned error against EACCES and fail if this is returned error. Feedback from Aurelien: PS> net user guest /activate:no PS> mkdir C:\guestshare PS> icacls C:\guestshare /grant 'Everyone:(OI)(CI)F' PS> new-smbshare -name guestshare -path C:\guestshare -fullaccess Everyone I've tested v3.10, v4.4, master, master+your patch using default options (empty or no user "NU") and user=abc (U). NT_LOGON_FAILURE in session setup: LF This is what you seem to have in 3.10. NT_ACCESS_DENIED in tree connect to the share: AD This is what you get before your infinite loop. | NU U -------------------------------- 3.10 | LF LF 4.4 | LF LF master | AD LF master+patch | AD LF No infinite DFS loop :( All these issues result in mount failing very fast with permission denied. I guess it could be from either the Windows version or the share/folder ACL. A deeper analysis of the packets might reveal more. In any case I did not notice any issues for on a basic DFS setup with the patch so I don't think it introduced any regressions, which is probably all that matters. It still bothers me a little I couldn't hit the bug. I've included kernel output w/ debugging output and network capture of my tests if anyone want to have a look at it. (master+patch = ml-guestfix). Signed-off-by: Mark Syms Reviewed-by: Aurelien Aptel Tested-by: Aurelien Aptel Acked-by: Pavel Shilovsky Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/connect.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 077ad3a06c9a..1eeb4780c3ed 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3674,6 +3674,9 @@ try_mount_again: if (IS_ERR(tcon)) { rc = PTR_ERR(tcon); tcon = NULL; + if (rc == -EACCES) + goto mount_fail_check; + goto remote_path_check; } From db769a03103b7cd1d419c76d3840e2d3a64c0df8 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 25 Jul 2017 14:53:03 +0100 Subject: [PATCH 117/189] arm64: Fix potential race with hardware DBM in ptep_set_access_flags() [ Upstream commit 6d332747fa5f0a6843b56b5b129168ba909336d1 ] In a system with DBM (dirty bit management) capable agents there is a possible race between a CPU executing ptep_set_access_flags() (maybe non-DBM capable) and a hardware update of the dirty state (clearing of PTE_RDONLY). The scenario: a) the pte is writable (PTE_WRITE set), clean (PTE_RDONLY set) and old (PTE_AF clear) b) ptep_set_access_flags() is called as a result of a read access and it needs to set the pte to writable, clean and young (PTE_AF set) c) a DBM-capable agent, as a result of a different write access, is marking the entry as young (setting PTE_AF) and dirty (clearing PTE_RDONLY) The current ptep_set_access_flags() implementation would set the PTE_RDONLY bit in the resulting value overriding the DBM update and losing the dirty state. This patch fixes such race by setting PTE_RDONLY to the most permissive (lowest value) of the current entry and the new one. Fixes: 66dbd6e61a52 ("arm64: Implement ptep_set_access_flags() for hardware AF/DBM") Cc: Will Deacon Acked-by: Mark Rutland Acked-by: Steve Capper Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- arch/arm64/mm/fault.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 86485415c5f0..be7f8416809f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -107,26 +107,27 @@ int ptep_set_access_flags(struct vm_area_struct *vma, /* only preserve the access flags and write permission */ pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY; - /* - * PTE_RDONLY is cleared by default in the asm below, so set it in - * back if necessary (read-only or clean PTE). - */ + /* set PTE_RDONLY if actual read-only or clean PTE */ if (!pte_write(entry) || !pte_sw_dirty(entry)) pte_val(entry) |= PTE_RDONLY; /* * Setting the flags must be done atomically to avoid racing with the - * hardware update of the access/dirty state. + * hardware update of the access/dirty state. The PTE_RDONLY bit must + * be set to the most permissive (lowest value) of *ptep and entry + * (calculated as: a & b == ~(~a | ~b)). */ + pte_val(entry) ^= PTE_RDONLY; asm volatile("// ptep_set_access_flags\n" " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" - " and %0, %0, %3 // clear PTE_RDONLY\n" + " eor %0, %0, %3 // negate PTE_RDONLY in *ptep\n" " orr %0, %0, %4 // set flags\n" + " eor %0, %0, %3 // negate final PTE_RDONLY\n" " stxr %w1, %0, %2\n" " cbnz %w1, 1b\n" : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) - : "L" (~PTE_RDONLY), "r" (pte_val(entry))); + : "L" (PTE_RDONLY), "r" (pte_val(entry))); flush_tlb_fix_spurious_fault(vma, address); return 1; From 9e9fe58a92a46c6d154d2901735bf230d91b8507 Mon Sep 17 00:00:00 2001 From: Jonathan Basseri Date: Wed, 25 Oct 2017 09:52:27 -0700 Subject: [PATCH 118/189] xfrm: Clear sk_dst_cache when applying per-socket policy. [ Upstream commit 2b06cdf3e688b98fcc9945873b5d42792bd4eee0 ] If a socket has a valid dst cache, then xfrm_lookup_route will get skipped. However, the cache is not invalidated when applying policy to a socket (i.e. IPV6_XFRM_POLICY). The result is that new policies are sometimes ignored on those sockets. (Note: This was broken for IPv4 and IPv6 at different times.) This can be demonstrated like so, 1. Create UDP socket. 2. connect() the socket. 3. Apply an outbound XFRM policy to the socket. (setsockopt) 4. send() data on the socket. Packets will continue to be sent in the clear instead of matching an xfrm or returning a no-match error (EAGAIN). This affects calls to send() and not sendto(). Invalidating the sk_dst_cache is necessary to correctly apply xfrm policies. Since we do this in xfrm_user_policy(), the sk_lock was already acquired in either do_ip_setsockopt() or do_ipv6_setsockopt(), and we may call __sk_dst_reset(). Performance impact should be negligible, since this code is only called when changing xfrm policy, and only affects the socket in question. Fixes: 00bc0ef5880d ("ipv6: Skip XFRM lookup if dst_entry in socket cache is valid") Tested: https://android-review.googlesource.com/517555 Tested: https://android-review.googlesource.com/418659 Signed-off-by: Jonathan Basseri Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index d6a11af0bab1..9b6e51450fc5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1884,6 +1884,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen if (err >= 0) { xfrm_sk_policy_insert(sk, err, pol); xfrm_pol_put(pol); + __sk_dst_reset(sk); err = 0; } From d896b9f298afcfc4873000a290324ba72b23eef5 Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Tue, 27 Jun 2017 14:55:58 -0400 Subject: [PATCH 119/189] scsi: Add STARGET_CREATED_REMOVE state to scsi_target_state [ Upstream commit f9279c968c257ee39b0d7bd2571a4d231a67bcc1 ] The addition of the STARGET_REMOVE state had the side effect of introducing a race condition that can cause a crash. scsi_target_reap_ref_release() checks the starget->state to see if it still in STARGET_CREATED, and if so, skips calling transport_remove_device() and device_del(), because the starget->state is only set to STARGET_RUNNING after scsi_target_add() has called device_add() and transport_add_device(). However, if an rport loss occurs while a target is being scanned, it can happen that scsi_remove_target() will be called while the starget is still in the STARGET_CREATED state. In this case, the starget->state will be set to STARGET_REMOVE, and as a result, scsi_target_reap_ref_release() will take the wrong path. The end result is a panic: [ 1255.356653] Oops: 0000 [#1] SMP [ 1255.360154] Modules linked in: x86_pkg_temp_thermal kvm_intel kvm irqbypass crc32c_intel ghash_clmulni_i [ 1255.393234] CPU: 5 PID: 149 Comm: kworker/u96:4 Tainted: G W 4.11.0+ #8 [ 1255.401879] Hardware name: Dell Inc. PowerEdge R320/08VT7V, BIOS 2.0.22 11/19/2013 [ 1255.410327] Workqueue: scsi_wq_6 fc_scsi_scan_rport [scsi_transport_fc] [ 1255.417720] task: ffff88060ca8c8c0 task.stack: ffffc900048a8000 [ 1255.424331] RIP: 0010:kernfs_find_ns+0x13/0xc0 [ 1255.429287] RSP: 0018:ffffc900048abbf0 EFLAGS: 00010246 [ 1255.435123] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 1255.443083] RDX: 0000000000000000 RSI: ffffffff8188d659 RDI: 0000000000000000 [ 1255.451043] RBP: ffffc900048abc10 R08: 0000000000000000 R09: 0000012433fe0025 [ 1255.459005] R10: 0000000025e5a4b5 R11: 0000000025e5a4b5 R12: ffffffff8188d659 [ 1255.466972] R13: 0000000000000000 R14: ffff8805f55e5088 R15: 0000000000000000 [ 1255.474931] FS: 0000000000000000(0000) GS:ffff880616b40000(0000) knlGS:0000000000000000 [ 1255.483959] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1255.490370] CR2: 0000000000000068 CR3: 0000000001c09000 CR4: 00000000000406e0 [ 1255.498332] Call Trace: [ 1255.501058] kernfs_find_and_get_ns+0x31/0x60 [ 1255.505916] sysfs_unmerge_group+0x1d/0x60 [ 1255.510498] dpm_sysfs_remove+0x22/0x60 [ 1255.514783] device_del+0xf4/0x2e0 [ 1255.518577] ? device_remove_file+0x19/0x20 [ 1255.523241] attribute_container_class_device_del+0x1a/0x20 [ 1255.529457] transport_remove_classdev+0x4e/0x60 [ 1255.534607] ? transport_add_class_device+0x40/0x40 [ 1255.540046] attribute_container_device_trigger+0xb0/0xc0 [ 1255.546069] transport_remove_device+0x15/0x20 [ 1255.551025] scsi_target_reap_ref_release+0x25/0x40 [ 1255.556467] scsi_target_reap+0x2e/0x40 [ 1255.560744] __scsi_scan_target+0xaa/0x5b0 [ 1255.565312] scsi_scan_target+0xec/0x100 [ 1255.569689] fc_scsi_scan_rport+0xb1/0xc0 [scsi_transport_fc] [ 1255.576099] process_one_work+0x14b/0x390 [ 1255.580569] worker_thread+0x4b/0x390 [ 1255.584651] kthread+0x109/0x140 [ 1255.588251] ? rescuer_thread+0x330/0x330 [ 1255.592730] ? kthread_park+0x60/0x60 [ 1255.596815] ret_from_fork+0x29/0x40 [ 1255.600801] Code: 24 08 48 83 42 40 01 5b 41 5c 5d c3 66 66 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 [ 1255.621876] RIP: kernfs_find_ns+0x13/0xc0 RSP: ffffc900048abbf0 [ 1255.628479] CR2: 0000000000000068 [ 1255.632756] ---[ end trace 34a69ba0477d036f ]--- Fix this by adding another scsi_target state STARGET_CREATED_REMOVE to distinguish this case. Fixes: f05795d3d771 ("scsi: Add intermediate STARGET_REMOVE state to scsi_target_state") Reported-by: David Jeffery Signed-off-by: Ewan D. Milne Cc: Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_scan.c | 5 +++-- drivers/scsi/scsi_sysfs.c | 8 ++++++-- include/scsi/scsi_device.h | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 692445bcca6f..850ddc5fac04 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -381,11 +381,12 @@ static void scsi_target_reap_ref_release(struct kref *kref) = container_of(kref, struct scsi_target, reap_ref); /* - * if we get here and the target is still in the CREATED state that + * if we get here and the target is still in a CREATED state that * means it was allocated but never made visible (because a scan * turned up no LUNs), so don't call device_del() on it. */ - if (starget->state != STARGET_CREATED) { + if ((starget->state != STARGET_CREATED) && + (starget->state != STARGET_CREATED_REMOVE)) { transport_remove_device(&starget->dev); device_del(&starget->dev); } diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 8db0c48943d6..085e470d1c49 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1212,11 +1212,15 @@ restart: spin_lock_irqsave(shost->host_lock, flags); list_for_each_entry(starget, &shost->__targets, siblings) { if (starget->state == STARGET_DEL || - starget->state == STARGET_REMOVE) + starget->state == STARGET_REMOVE || + starget->state == STARGET_CREATED_REMOVE) continue; if (starget->dev.parent == dev || &starget->dev == dev) { kref_get(&starget->reap_ref); - starget->state = STARGET_REMOVE; + if (starget->state == STARGET_CREATED) + starget->state = STARGET_CREATED_REMOVE; + else + starget->state = STARGET_REMOVE; spin_unlock_irqrestore(shost->host_lock, flags); __scsi_remove_target(starget); scsi_target_reap(starget); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 293b9a7f53bc..fb53a94a5e8b 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -240,6 +240,7 @@ enum scsi_target_state { STARGET_CREATED = 1, STARGET_RUNNING, STARGET_REMOVE, + STARGET_CREATED_REMOVE, STARGET_DEL, }; From c300313d484db2511f3496bdca96b1e902462839 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 11 Apr 2016 17:57:05 -0700 Subject: [PATCH 120/189] sparc/pci: Refactor dev_archdata initialization into pci_init_dev_archdata [ Upstream commit 9a78d4fc28904785ffe4c2d361e25b251b479704 ] The function pcibios_add_device() added by commit d0c31e020057 ("sparc/PCI: Fix for panic while enabling SR-IOV") initializes the dev_archdata by doing a memcpy from the PF. This has the problem that it erroneously copies the OF device without explicitly refcounting it. As David Miller pointed out: "Generally speaking we don't really support hot-plug for OF probed devices, but if we did all of the device tree pointers have to be refcounted properly." To fix this error, and also avoid code duplication, this patch creates a new helper function, pci_init_dev_archdata(), that initializes the fields in dev_archdata, and can be invoked by callers after they have taken the needed refcounts Signed-off-by: Sowmini Varadhan Tested-by: Babu Moger Reviewed-by: Khalid Aziz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/kernel/pci.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 9f9614df9e1e..c2b202d763a1 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -245,6 +245,18 @@ static void pci_parse_of_addrs(struct platform_device *op, } } +static void pci_init_dev_archdata(struct dev_archdata *sd, void *iommu, + void *stc, void *host_controller, + struct platform_device *op, + int numa_node) +{ + sd->iommu = iommu; + sd->stc = stc; + sd->host_controller = host_controller; + sd->op = op; + sd->numa_node = numa_node; +} + static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, struct device_node *node, struct pci_bus *bus, int devfn) @@ -259,13 +271,10 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, if (!dev) return NULL; + op = of_find_device_by_node(node); sd = &dev->dev.archdata; - sd->iommu = pbm->iommu; - sd->stc = &pbm->stc; - sd->host_controller = pbm; - sd->op = op = of_find_device_by_node(node); - sd->numa_node = pbm->numa_node; - + pci_init_dev_archdata(sd, pbm->iommu, &pbm->stc, pbm, op, + pbm->numa_node); sd = &op->dev.archdata; sd->iommu = pbm->iommu; sd->stc = &pbm->stc; @@ -1003,9 +1012,13 @@ int pcibios_add_device(struct pci_dev *dev) * Copy dev_archdata from PF to VF */ if (dev->is_virtfn) { + struct dev_archdata *psd; + pdev = dev->physfn; - memcpy(&dev->dev.archdata, &pdev->dev.archdata, - sizeof(struct dev_archdata)); + psd = &pdev->dev.archdata; + pci_init_dev_archdata(&dev->dev.archdata, psd->iommu, + psd->stc, psd->host_controller, NULL, + psd->numa_node); } return 0; } From ab0b3b9dbf559a5633d460e748144697bd2d3aa3 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:18 -0700 Subject: [PATCH 121/189] sch_red: update backlog as well [ Upstream commit d7f4f332f082c4d4ba53582f902ed6b44fd6f45e ] Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_red.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 0505b8408c8b..4bf2b599ef98 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -97,6 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -118,6 +119,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) skb = child->dequeue(child); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { if (!red_is_idling(&q->vars)) @@ -143,6 +145,7 @@ static unsigned int red_drop(struct Qdisc *sch) if (child->ops->drop && (len = child->ops->drop(child)) > 0) { q->stats.other++; qdisc_qstats_drop(sch); + sch->qstats.backlog -= len; sch->q.qlen--; return len; } @@ -158,6 +161,7 @@ static void red_reset(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; red_restart(&q->vars); } From 75c8542375a1ed286ad617badf24408ce376de80 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Thu, 21 Sep 2017 16:02:05 -0400 Subject: [PATCH 122/189] usb-storage: fix bogus hardware error messages for ATA pass-thru devices [ Upstream commit a4fd4a724d6c30ad671046d83be2e9be2f11d275 ] Ever since commit a621bac3044e ("scsi_lib: correctly retry failed zero length REQ_TYPE_FS commands"), people have been getting bogus error messages for USB disk drives using ATA pass-thru. For example: [ 1344.880193] sd 6:0:0:0: [sdb] Attached SCSI disk [ 1345.069152] sd 6:0:0:0: [sdb] tag#0 FAILED Result: hostbyte=DID_ERROR driverbyte=DRIVER_SENSE [ 1345.069159] sd 6:0:0:0: [sdb] tag#0 Sense Key : Hardware Error [current] [descriptor] [ 1345.069162] sd 6:0:0:0: [sdb] tag#0 Add. Sense: No additional sense information [ 1345.069168] sd 6:0:0:0: [sdb] tag#0 CDB: ATA command pass through(16) 85 06 20 00 00 00 00 00 00 00 00 00 00 00 e5 00 [ 1345.172252] sd 6:0:0:0: [sdb] tag#0 FAILED Result: hostbyte=DID_ERROR driverbyte=DRIVER_SENSE [ 1345.172258] sd 6:0:0:0: [sdb] tag#0 Sense Key : Hardware Error [current] [descriptor] [ 1345.172261] sd 6:0:0:0: [sdb] tag#0 Add. Sense: No additional sense information [ 1345.172266] sd 6:0:0:0: [sdb] tag#0 CDB: ATA command pass through(12)/Blank a1 06 20 da 00 00 4f c2 00 b0 00 00 These messages can be quite annoying, because programs like udisks2 provoke them every 10 minutes or so. Other programs can also have this effect, such as those in smartmontools. I don't fully understand how that commit induced the SCSI core to log these error messages, but the underlying cause for them is code added to usb-storage by commit f1a0743bc0e7 ("USB: storage: When a device returns no sense data, call it a Hardware Error"). At the time it was necessary to do this, in order to prevent an infinite retry loop with some not-so-great mass storage devices. However, the ATA pass-thru protocol uses SCSI sense data to return command status values, and some devices always report Check Condition status for ATA pass-thru commands to ensure that the host retrieves the sense data, even if the command succeeded. This violates the USB mass-storage protocol (Check Condition status is supposed to mean the command failed), but we can't help that. This patch attempts to mitigate the problem of these bogus error reports by changing usb-storage. The HARDWARE ERROR sense key will be inserted only for commands that aren't ATA pass-thru. Thanks to Ewan Milne for pointing out that this mechanism was present in usb-storage. 8 years after writing it, I had completely forgotten its existence. Signed-off-by: Alan Stern Tested-by: Kris Lindgren Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1351305 CC: Ewan D. Milne CC: Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/storage/transport.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 02f86dd1a340..90a7bffe3484 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -808,12 +808,24 @@ Retry_Sense: if (result == USB_STOR_TRANSPORT_GOOD) { srb->result = SAM_STAT_GOOD; srb->sense_buffer[0] = 0x0; + } + + /* + * ATA-passthru commands use sense data to report + * the command completion status, and often devices + * return Check Condition status when nothing is + * wrong. + */ + else if (srb->cmnd[0] == ATA_16 || + srb->cmnd[0] == ATA_12) { + /* leave the data alone */ + } /* If there was a problem, report an unspecified * hardware error to prevent the higher layers from * entering an infinite retry loop. */ - } else { + else { srb->result = DID_ERROR << 16; if ((sshdr.response_code & 0x72) == 0x72) srb->sense_buffer[1] = HARDWARE_ERROR; From e25dc63aa366fd0f61d1d9ba67b66f5d75fc4372 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 30 Jun 2016 17:24:43 +0200 Subject: [PATCH 123/189] bpf: generally move prog destruction to RCU deferral [ Upstream commit 1aacde3d22c42281236155c1ef6d7a5aa32a826b ] Jann Horn reported following analysis that could potentially result in a very hard to trigger (if not impossible) UAF race, to quote his event timeline: - Set up a process with threads T1, T2 and T3 - Let T1 set up a socket filter F1 that invokes another filter F2 through a BPF map [tail call] - Let T1 trigger the socket filter via a unix domain socket write, don't wait for completion - Let T2 call PERF_EVENT_IOC_SET_BPF with F2, don't wait for completion - Now T2 should be behind bpf_prog_get(), but before bpf_prog_put() - Let T3 close the file descriptor for F2, dropping the reference count of F2 to 2 - At this point, T1 should have looked up F2 from the map, but not finished executing it - Let T3 remove F2 from the BPF map, dropping the reference count of F2 to 1 - Now T2 should call bpf_prog_put() (wrong BPF program type), dropping the reference count of F2 to 0 and scheduling bpf_prog_free_deferred() via schedule_work() - At this point, the BPF program could be freed - BPF execution is still running in a freed BPF program While at PERF_EVENT_IOC_SET_BPF time it's only guaranteed that the perf event fd we're doing the syscall on doesn't disappear from underneath us for whole syscall time, it may not be the case for the bpf fd used as an argument only after we did the put. It needs to be a valid fd pointing to a BPF program at the time of the call to make the bpf_prog_get() and while T2 gets preempted, F2 must have dropped reference to 1 on the other CPU. The fput() from the close() in T3 should also add additionally delay to the reference drop via exit_task_work() when bpf_prog_release() gets called as well as scheduling bpf_prog_free_deferred(). That said, it makes nevertheless sense to move the BPF prog destruction generally after RCU grace period to guarantee that such scenario above, but also others as recently fixed in ceb56070359b ("bpf, perf: delay release of BPF prog after grace period") with regards to tail calls won't happen. Integrating bpf_prog_free_deferred() directly into the RCU callback is not allowed since the invocation might happen from either softirq or process context, so we're not permitted to block. Reviewing all bpf_prog_put() invocations from eBPF side (note, cBPF -> eBPF progs don't use this for their destruction) with call_rcu() look good to me. Since we don't know whether at the time of attaching the program, we're already part of a tail call map, we need to use RCU variant. However, due to this, there won't be severely more stress on the RCU callback queue: situations with above bpf_prog_get() and bpf_prog_put() combo in practice normally won't lead to releases, but even if they would, enough effort/ cycles have to be put into loading a BPF program into the kernel already. Reported-by: Jann Horn Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/bpf.h | 5 ----- kernel/bpf/arraymap.c | 4 +--- kernel/bpf/syscall.c | 13 +++---------- kernel/events/core.c | 2 +- 4 files changed, 5 insertions(+), 19 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 132585a7fbd8..bae3da5bcda0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -177,7 +177,6 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); @@ -208,10 +207,6 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) static inline void bpf_prog_put(struct bpf_prog *prog) { } - -static inline void bpf_prog_put_rcu(struct bpf_prog *prog) -{ -} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 0eb11b4ac4c7..daa4e0782cf7 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -270,9 +270,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd) static void prog_fd_array_put_ptr(void *ptr) { - struct bpf_prog *prog = ptr; - - bpf_prog_put_rcu(prog); + bpf_prog_put(ptr); } /* decrement refcnt of all bpf_progs that are stored in this map */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4b9bbfe764e8..04fc1022ad9f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -487,7 +487,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) free_uid(user); } -static void __prog_put_common(struct rcu_head *rcu) +static void __bpf_prog_put_rcu(struct rcu_head *rcu) { struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); @@ -496,17 +496,10 @@ static void __prog_put_common(struct rcu_head *rcu) bpf_prog_free(aux->prog); } -/* version of bpf_prog_put() that is called after a grace period */ -void bpf_prog_put_rcu(struct bpf_prog *prog) -{ - if (atomic_dec_and_test(&prog->aux->refcnt)) - call_rcu(&prog->aux->rcu, __prog_put_common); -} - void bpf_prog_put(struct bpf_prog *prog) { if (atomic_dec_and_test(&prog->aux->refcnt)) - __prog_put_common(&prog->aux->rcu); + call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } EXPORT_SYMBOL_GPL(bpf_prog_put); @@ -514,7 +507,7 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) { struct bpf_prog *prog = filp->private_data; - bpf_prog_put_rcu(prog); + bpf_prog_put(prog); return 0; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 19519ec39d51..e53dfb5b826e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7141,7 +7141,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event) prog = event->tp_event->prog; if (prog && event->tp_event->bpf_prog_owner == event) { event->tp_event->prog = NULL; - bpf_prog_put_rcu(prog); + bpf_prog_put(prog); } } From 4a3948cb6da454f21f89a34e694cea322ea559f9 Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Sat, 23 Sep 2017 13:10:33 -0700 Subject: [PATCH 124/189] drm/nouveau/fbcon: fix oops without fbdev emulation [ Upstream commit 4813766325374af6ed0b66879ba6a0bbb05c83b6 ] This is similar to an earlier commit 52dfcc5ccfbb ("drm/nouveau: fix for disabled fbdev emulation"), but protects all occurrences of helper.fbdev in the source. I see oops in nouveau_fbcon_accel_save_disable() called from nouveau_fbcon_set_suspend_work() on Linux 3.13 when CONFIG_DRM_FBDEV_EMULATION option is disabled. Signed-off-by: Pavel Roskin Reviewed-by: Daniel Vetter Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index e40a1b07a014..343476d15726 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -235,7 +235,7 @@ void nouveau_fbcon_accel_save_disable(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { + if (drm->fbcon && drm->fbcon->helper.fbdev) { drm->fbcon->saved_flags = drm->fbcon->helper.fbdev->flags; drm->fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; } @@ -245,7 +245,7 @@ void nouveau_fbcon_accel_restore(struct drm_device *dev) { struct nouveau_drm *drm = nouveau_drm(dev); - if (drm->fbcon) { + if (drm->fbcon && drm->fbcon->helper.fbdev) { drm->fbcon->helper.fbdev->flags = drm->fbcon->saved_flags; } } @@ -257,7 +257,8 @@ nouveau_fbcon_accel_fini(struct drm_device *dev) struct nouveau_fbdev *fbcon = drm->fbcon; if (fbcon && drm->channel) { console_lock(); - fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; + if (fbcon->helper.fbdev) + fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED; console_unlock(); nouveau_channel_idle(drm->channel); nvif_object_fini(&fbcon->twod); From 1b6a863ff29ccd7b65b0112bbec1cda26b7f9c4e Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Wed, 12 Jul 2017 19:26:58 -0700 Subject: [PATCH 125/189] fuse: Dont call set_page_dirty_lock() for ITER_BVEC pages for async_dio [ Upstream commit 61c12b49e1c9c77d7a1bcc161de540d0fd21cf0c ] Commit 8fba54aebbdf ("fuse: direct-io: don't dirty ITER_BVEC pages") fixes the ITER_BVEC page deadlock for direct io in fuse by checking in fuse_direct_io(), whether the page is a bvec page or not, before locking it. However, this check is missed when the "async_dio" mount option is enabled. In this case, set_page_dirty_lock() is called from the req->end callback in request_end(), when the fuse thread is returning from userspace to respond to the read request. This will cause the same deadlock because the bvec condition is not checked in this path. Here is the stack of the deadlocked thread, while returning from userspace: [13706.656686] INFO: task glusterfs:3006 blocked for more than 120 seconds. [13706.657808] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [13706.658788] glusterfs D ffffffff816c80f0 0 3006 1 0x00000080 [13706.658797] ffff8800d6713a58 0000000000000086 ffff8800d9ad7000 ffff8800d9ad5400 [13706.658799] ffff88011ffd5cc0 ffff8800d6710008 ffff88011fd176c0 7fffffffffffffff [13706.658801] 0000000000000002 ffffffff816c80f0 ffff8800d6713a78 ffffffff816c790e [13706.658803] Call Trace: [13706.658809] [] ? bit_wait_io_timeout+0x80/0x80 [13706.658811] [] schedule+0x3e/0x90 [13706.658813] [] schedule_timeout+0x1b5/0x210 [13706.658816] [] ? gup_pud_range+0x1db/0x1f0 [13706.658817] [] ? kvm_clock_read+0x1e/0x20 [13706.658819] [] ? kvm_clock_get_cycles+0x9/0x10 [13706.658822] [] ? ktime_get+0x52/0xc0 [13706.658824] [] io_schedule_timeout+0xa4/0x110 [13706.658826] [] bit_wait_io+0x36/0x50 [13706.658828] [] __wait_on_bit_lock+0x76/0xb0 [13706.658831] [] ? lock_request+0x46/0x70 [fuse] [13706.658834] [] __lock_page+0xaa/0xb0 [13706.658836] [] ? wake_atomic_t_function+0x40/0x40 [13706.658838] [] set_page_dirty_lock+0x58/0x60 [13706.658841] [] fuse_release_user_pages+0x58/0x70 [fuse] [13706.658844] [] ? fuse_aio_complete+0x190/0x190 [fuse] [13706.658847] [] fuse_aio_complete_req+0x29/0x90 [fuse] [13706.658849] [] request_end+0xd9/0x190 [fuse] [13706.658852] [] fuse_dev_do_write+0x336/0x490 [fuse] [13706.658854] [] fuse_dev_write+0x6e/0xa0 [fuse] [13706.658857] [] ? security_file_permission+0x23/0x90 [13706.658859] [] do_iter_readv_writev+0x60/0x90 [13706.658862] [] ? fuse_dev_splice_write+0x350/0x350 [fuse] [13706.658863] [] do_readv_writev+0x171/0x1f0 [13706.658866] [] ? try_to_wake_up+0x210/0x210 [13706.658868] [] vfs_writev+0x41/0x50 [13706.658870] [] SyS_writev+0x56/0xf0 [13706.658872] [] ? syscall_trace_leave+0xf1/0x160 [13706.658874] [] system_call_fastpath+0x12/0x71 Fix this by making should_dirty a fuse_io_priv parameter that can be checked in fuse_aio_complete_req(). Reported-by: Tiger Yang Signed-off-by: Ashish Samant Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/file.c | 6 +++--- fs/fuse/fuse_i.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8577f3ba6dc6..7014318f6d18 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -625,7 +625,7 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) struct fuse_io_priv *io = req->io; ssize_t pos = -1; - fuse_release_user_pages(req, !io->write); + fuse_release_user_pages(req, io->should_dirty); if (io->write) { if (req->misc.write.in.size != req->misc.write.out.size) @@ -1333,7 +1333,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, loff_t *ppos, int flags) { int write = flags & FUSE_DIO_WRITE; - bool should_dirty = !write && iter_is_iovec(iter); int cuse = flags & FUSE_DIO_CUSE; struct file *file = io->file; struct inode *inode = file->f_mapping->host; @@ -1362,6 +1361,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, mutex_unlock(&inode->i_mutex); } + io->should_dirty = !write && iter_is_iovec(iter); while (count) { size_t nres; fl_owner_t owner = current->files; @@ -1378,7 +1378,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, nres = fuse_send_read(req, io, pos, nbytes, owner); if (!io->async) - fuse_release_user_pages(req, should_dirty); + fuse_release_user_pages(req, io->should_dirty); if (req->out.h.error) { if (!res) res = req->out.h.error; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7aafe9acc6c0..c6eb35a95fcc 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -252,6 +252,7 @@ struct fuse_io_priv { size_t size; __u64 offset; bool write; + bool should_dirty; int err; struct kiocb *iocb; struct file *file; From c97383a60e49db65a88c864aa0280d491e299e36 Mon Sep 17 00:00:00 2001 From: William Dauchy Date: Fri, 30 Oct 2015 18:16:30 +0100 Subject: [PATCH 126/189] ixgbevf: Fix handling of NAPI budget when multiple queues are enabled per vector [ Upstream commit d0f71afffa1c3d5a36a4a278f1dbbd2643176dc3 ] This is the same patch as for ixgbe but applied differently according to busy polling. See commit 5d6002b7b822c74 ("ixgbe: Fix handling of NAPI budget when multiple queues are enabled per vector") Signed-off-by: William Dauchy Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 50bbad37d640..723bda33472a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1014,6 +1014,8 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) ixgbevf_for_each_ring(ring, q_vector->tx) clean_complete &= ixgbevf_clean_tx_irq(q_vector, ring); + if (budget <= 0) + return budget; #ifdef CONFIG_NET_RX_BUSY_POLL if (!ixgbevf_qv_lock_napi(q_vector)) return budget; From 40e9abaac6531e665cf57e4d456774239264a7cd Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 29 Feb 2016 21:17:10 +0200 Subject: [PATCH 127/189] net/mlx5e: Fix LRO modify [ Upstream commit ab0394fe2c258fdb5086c51a251b28f8ee7ab35c ] Ethtool LRO enable/disable is broken, as of today we only modify TCP TIRs in order to apply the requested configuration. Hardware requires that all TIRs pointing to the same RQ should share the same LRO configuration. For that all other TIRs' LRO fields must be modified as well. Fixes: 5c50368f3831 ('net/mlx5e: Light-weight netdev open/stop') Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 90e876ecc720..765b069d6a90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1304,7 +1304,7 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) lro_timer_supported_periods[2])); } -static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) +static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1312,6 +1312,7 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) void *tirc; int inlen; int err; + int tt; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); @@ -1323,7 +1324,11 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) mlx5e_build_tir_ctx_lro(tirc, priv); - err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + if (err) + break; + } kvfree(in); @@ -1870,8 +1875,10 @@ static int mlx5e_set_features(struct net_device *netdev, mlx5e_close_locked(priv->netdev); priv->params.lro_en = !!(features & NETIF_F_LRO); - mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV4_TCP); - mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV6_TCP); + err = mlx5e_modify_tirs_lro(priv); + if (err) + mlx5_core_warn(priv->mdev, "lro modify failed, %d\n", + err); if (was_opened) err = mlx5e_open_locked(priv->netdev); From fcac753bf1563ab131dc602e55890fa6b93f5491 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 29 Feb 2016 21:17:13 +0200 Subject: [PATCH 128/189] net/mlx5e: Correctly handle RSS indirection table when changing number of channels [ Upstream commit 85082dba0a5059c538cfa786d07f5ec5370d22fe ] Upon changing num_channels, reset the RSS indirection table to match the new value. Fixes: 2d75b2bc8a8c ('net/mlx5e: Add ethtool RSS configuration options') Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 +++++++++++---- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 22e72bf1ae48..7a716733d9ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -586,6 +586,8 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, + int num_channels); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, struct mlx5e_tx_wqe *wqe, int bf_sz) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7cc9df717323..7ee301310817 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -385,6 +385,8 @@ static int mlx5e_set_channels(struct net_device *dev, mlx5e_close_locked(dev); priv->params.num_channels = count; + mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); if (was_opened) err = mlx5e_open_locked(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 765b069d6a90..26d25ecdca7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1186,7 +1186,6 @@ static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc) ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE); ix = priv->params.indirection_rqt[ix]; - ix = ix % priv->params.num_channels; MLX5_SET(rqtc, rqtc, rq_num[i], test_bit(MLX5E_STATE_OPENED, &priv->state) ? priv->channel[ix]->rq.rqn : @@ -1983,12 +1982,20 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/; } +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, + int num_channels) +{ + int i; + + for (i = 0; i < len; i++) + indirection_rqt[i] = i % num_channels; +} + static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, int num_channels) { struct mlx5e_priv *priv = netdev_priv(netdev); - int i; priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; @@ -2012,8 +2019,8 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, netdev_rss_key_fill(priv->params.toeplitz_hash_key, sizeof(priv->params.toeplitz_hash_key)); - for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) - priv->params.indirection_rqt[i] = i % num_channels; + mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, num_channels); priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; From 83662744dcf2abe632b25c8bac256de69c99c06b Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Fri, 20 Nov 2015 13:02:16 -0800 Subject: [PATCH 129/189] ixgbe: fix RSS limit for X550 [ Upstream commit e9ee3238f8a480bbca58e51d02a93628d7c1f265 ] X550 allows for up to 64 RSS queues, but the driver can have max of 63 (-1 MSIX vector for link). On systems with >= 64 CPUs the driver will set the redirection table for all 64 queues which will result in packets being dropped. Signed-off-by: Emil Tantilov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 1d2174526a4c..18e4e4a69262 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -312,7 +312,7 @@ enum ixgbe_ring_f_enum { }; #define IXGBE_MAX_RSS_INDICES 16 -#define IXGBE_MAX_RSS_INDICES_X550 64 +#define IXGBE_MAX_RSS_INDICES_X550 63 #define IXGBE_MAX_VMDQ_INDICES 64 #define IXGBE_MAX_FDIR_INDICES 63 /* based on q_vector limit */ #define IXGBE_MAX_FCOE_INDICES 8 From 83aacb9cf479031bb8de3d0a3bcf132cf6b158f0 Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Fri, 20 Nov 2015 13:12:17 -0800 Subject: [PATCH 130/189] ixgbe: Correct X550EM_x revision check [ Upstream commit 3ca2b2506ec9a3b1615930a6810d30ec9aba10a1 ] The X550EM_x revision check needs to check a value, not just a bit. Use a mask and check the value. Also remove the redundant check inside the ixgbe_enter_lplu_t_x550em, because it can only be called when both the mac type and revision check pass. Signed-off-by: Mark Rustad Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 995f03107eac..04bc4df82fa7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -3508,7 +3508,7 @@ struct ixgbe_info { #define IXGBE_FUSES0_GROUP(_i) (0x11158 + ((_i) * 4)) #define IXGBE_FUSES0_300MHZ BIT(5) -#define IXGBE_FUSES0_REV1 BIT(6) +#define IXGBE_FUSES0_REV_MASK (3 << 6) #define IXGBE_KRM_PORT_CAR_GEN_CTRL(P) ((P) ? 0x8010 : 0x4010) #define IXGBE_KRM_LINK_CTRL_1(P) ((P) ? 0x820C : 0x420C) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index a75f2e3ce86f..ffd2e74e5638 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1873,10 +1873,6 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw) u32 save_autoneg; bool link_up; - /* SW LPLU not required on later HW revisions. */ - if (IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0))) - return 0; - /* If blocked by MNG FW, then don't restart AN */ if (ixgbe_check_reset_blocked(hw)) return 0; @@ -2030,8 +2026,9 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) } /* setup SW LPLU only for first revision */ - if (!(IXGBE_FUSES0_REV1 & IXGBE_READ_REG(hw, - IXGBE_FUSES0_GROUP(0)))) + if (hw->mac.type == ixgbe_mac_X550EM_x && + !(IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)) & + IXGBE_FUSES0_REV_MASK)) phy->ops.enter_lplu = ixgbe_enter_lplu_t_x550em; phy->ops.handle_lasi = ixgbe_handle_lasi_ext_t_x550em; From c21f80e9661b5363638eef93be4bf8ed4fea7577 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 7 Sep 2016 15:45:31 +0200 Subject: [PATCH 131/189] ALSA: timer: Fix zero-division by continue of uninitialized instance [ Upstream commit 9f8a7658bcafb2a7853f7a2eae8a94e87e6e695b ] When a user timer instance is continued without the explicit start beforehand, the system gets eventually zero-division error like: divide error: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN CPU: 1 PID: 27320 Comm: syz-executor Not tainted 4.8.0-rc3-next-20160825+ #8 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88003c9b2280 task.stack: ffff880027280000 RIP: 0010:[] [< inline >] ktime_divns include/linux/ktime.h:195 RIP: 0010:[] [] snd_hrtimer_callback+0x1bc/0x3c0 sound/core/hrtimer.c:62 Call Trace: [< inline >] __run_hrtimer kernel/time/hrtimer.c:1238 [] __hrtimer_run_queues+0x325/0xe70 kernel/time/hrtimer.c:1302 [] hrtimer_interrupt+0x18b/0x420 kernel/time/hrtimer.c:1336 [] local_apic_timer_interrupt+0x6f/0xe0 arch/x86/kernel/apic/apic.c:933 [] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:957 [] apic_timer_interrupt+0x8c/0xa0 arch/x86/entry/entry_64.S:487 ..... Although a similar issue was spotted and a fix patch was merged in commit [6b760bb2c63a: ALSA: timer: fix division by zero after SNDRV_TIMER_IOCTL_CONTINUE], it seems covering only a part of iceberg. In this patch, we fix the issue a bit more drastically. Basically the continue of an uninitialized timer is supposed to be a fresh start, so we do it for user timers. For the direct snd_timer_continue() call, there is no way to pass the initial tick value, so we kick out for the uninitialized case. Reported-by: Dmitry Vyukov Cc: Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/core/timer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/core/timer.c b/sound/core/timer.c index ef850a99d64a..f989adb98a22 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -35,6 +35,9 @@ #include #include +/* internal flags */ +#define SNDRV_TIMER_IFLG_PAUSED 0x00010000 + #if IS_ENABLED(CONFIG_SND_HRTIMER) #define DEFAULT_TIMER_LIMIT 4 #elif IS_ENABLED(CONFIG_SND_RTCTIMER) @@ -547,6 +550,10 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop) } } timeri->flags &= ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START); + if (stop) + timeri->flags &= ~SNDRV_TIMER_IFLG_PAUSED; + else + timeri->flags |= SNDRV_TIMER_IFLG_PAUSED; snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : SNDRV_TIMER_EVENT_PAUSE); unlock: @@ -608,6 +615,10 @@ int snd_timer_stop(struct snd_timer_instance *timeri) */ int snd_timer_continue(struct snd_timer_instance *timeri) { + /* timer can continue only after pause */ + if (!(timeri->flags & SNDRV_TIMER_IFLG_PAUSED)) + return -EINVAL; + if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) return snd_timer_start_slave(timeri, false); else @@ -1837,6 +1848,9 @@ static int snd_timer_user_continue(struct file *file) tu = file->private_data; if (!tu->timeri) return -EBADFD; + /* start timer instead of continue if it's not used before */ + if (!(tu->timeri->flags & SNDRV_TIMER_IFLG_PAUSED)) + return snd_timer_user_start(file); tu->timeri->lost = 0; return (err = snd_timer_continue(tu->timeri)) < 0 ? err : 0; } From 152368987f1844ca5728ccbac5e8d7a7de8a7457 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 30 Sep 2016 11:11:07 +0200 Subject: [PATCH 132/189] vti6: flush x-netns xfrm cache when vti interface is removed [ Upstream commit 7f92083eb58f85ea114d97f65fcbe22be5b0468d ] This is the same fix than commit a5d0dc810abf ("vti: flush x-netns xfrm cache when vti interface is removed") This patch fixes a refcnt problem when a x-netns vti6 interface is removed: unregister_netdevice: waiting for vti6_test to become free. Usage count = 1 Here is a script to reproduce the problem: ip link set dev ntfp2 up ip addr add dev ntfp2 2001::1/64 ip link add vti6_test type vti6 local 2001::1 remote 2001::2 key 1 ip netns add secure ip link set vti6_test netns secure ip netns exec secure ip link set vti6_test up ip netns exec secure ip link s lo up ip netns exec secure ip addr add dev vti6_test 2003::1/64 ip -6 xfrm policy add dir out tmpl src 2001::1 dst 2001::2 proto esp \ mode tunnel mark 1 ip -6 xfrm policy add dir in tmpl src 2001::2 dst 2001::1 proto esp \ mode tunnel mark 1 ip xfrm state add src 2001::1 dst 2001::2 proto esp spi 1 mode tunnel \ enc des3_ede 0x112233445566778811223344556677881122334455667788 mark 1 ip xfrm state add src 2001::2 dst 2001::1 proto esp spi 1 mode tunnel \ enc des3_ede 0x112233445566778811223344556677881122334455667788 mark 1 ip netns exec secure ping6 -c 4 2003::2 ip netns del secure CC: Lance Richardson Signed-off-by: Nicolas Dichtel Acked-by: Lance Richardson Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin --- net/ipv6/ip6_vti.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 60d4052d97a6..51da5987952c 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1140,6 +1140,33 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; +static bool is_vti6_tunnel(const struct net_device *dev) +{ + return dev->netdev_ops == &vti6_netdev_ops; +} + +static int vti6_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct ip6_tnl *t = netdev_priv(dev); + + if (!is_vti6_tunnel(dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_DOWN: + if (!net_eq(t->net, dev_net(dev))) + xfrm_garbage_collect(t->net); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block vti6_notifier_block __read_mostly = { + .notifier_call = vti6_device_event, +}; + /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1150,6 +1177,8 @@ static int __init vti6_tunnel_init(void) const char *msg; int err; + register_netdevice_notifier(&vti6_notifier_block); + msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) @@ -1182,6 +1211,7 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); pernet_dev_failed: + unregister_netdevice_notifier(&vti6_notifier_block); pr_err("vti6 init: failed to register %s\n", msg); return err; } @@ -1196,6 +1226,7 @@ static void __exit vti6_tunnel_cleanup(void) xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); + unregister_netdevice_notifier(&vti6_notifier_block); } module_init(vti6_tunnel_init); From 02bc5312bb66a828f21d85af14425fd45eec08f8 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 29 Mar 2016 14:55:22 -0700 Subject: [PATCH 133/189] gro: Allow tunnel stacking in the case of FOU/GUE [ Upstream commit c3483384ee511ee2af40b4076366cd82a6a47b86 ] This patch should fix the issues seen with a recent fix to prevent tunnel-in-tunnel frames from being generated with GRO. The fix itself is correct for now as long as we do not add any devices that support NETIF_F_GSO_GRE_CSUM. When such a device is added it could have the potential to mess things up due to the fact that the outer transport header points to the outer UDP header and not the GRE header as would be expected. Fixes: fac8e0f579695 ("tunnels: Don't apply GRO to multiple layers of encapsulation.") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/fou.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 08d8ee124538..d83888bc33d3 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -195,6 +195,14 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head, u8 proto = NAPI_GRO_CB(skb)->proto; const struct net_offload **offloads; + /* We can clear the encap_mark for FOU as we are essentially doing + * one of two possible things. We are either adding an L4 tunnel + * header to the outer L3 tunnel header, or we are are simply + * treating the GRE tunnel header as though it is a UDP protocol + * specific header such as VXLAN or GENEVE. + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); @@ -354,6 +362,14 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, } } + /* We can clear the encap_mark for GUE as we are essentially doing + * one of two possible things. We are either adding an L4 tunnel + * header to the outer L3 tunnel header, or we are are simply + * treating the GRE tunnel header as though it is a UDP protocol + * specific header such as VXLAN or GENEVE. + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[guehdr->proto_ctype]); From 44656b939099b3381134dfe7c1cf5ccfc08d3272 Mon Sep 17 00:00:00 2001 From: "Peter S. Housel" Date: Mon, 12 Jun 2017 11:46:22 +0100 Subject: [PATCH 134/189] brcmfmac: Fix glom_skb leak in brcmf_sdiod_recv_chain [ Upstream commit 5ea59db8a375216e6c915c5586f556766673b5a7 ] An earlier change to this function (3bdae810721b) fixed a leak in the case of an unsuccessful call to brcmf_sdiod_buffrw(). However, the glom_skb buffer, used for emulating a scattering read, is never used or referenced after its contents are copied into the destination buffers, and therefore always needs to be freed by the end of the function. Fixes: 3bdae810721b ("brcmfmac: Fix glob_skb leak in brcmf_sdiod_recv_chain") Fixes: a413e39a38573 ("brcmfmac: fix brcmf_sdcard_recv_chain() for host without sg support") Cc: stable@vger.kernel.org # 4.9.x- Signed-off-by: Peter S. Housel Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c index 91da67657f81..72e1796c8167 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c @@ -705,7 +705,7 @@ done: int brcmf_sdiod_recv_chain(struct brcmf_sdio_dev *sdiodev, struct sk_buff_head *pktq, uint totlen) { - struct sk_buff *glom_skb; + struct sk_buff *glom_skb = NULL; struct sk_buff *skb; u32 addr = sdiodev->sbwad; int err = 0; @@ -726,10 +726,8 @@ int brcmf_sdiod_recv_chain(struct brcmf_sdio_dev *sdiodev, return -ENOMEM; err = brcmf_sdiod_buffrw(sdiodev, SDIO_FUNC_2, false, addr, glom_skb); - if (err) { - brcmu_pkt_buf_free_skb(glom_skb); + if (err) goto done; - } skb_queue_walk(pktq, skb) { memcpy(skb->data, glom_skb->data, skb->len); @@ -740,6 +738,7 @@ int brcmf_sdiod_recv_chain(struct brcmf_sdio_dev *sdiodev, pktq); done: + brcmu_pkt_buf_free_skb(glom_skb); return err; } From 414fb21bd5e0b72ff76045e3b76cea9629b47ea0 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 29 Nov 2016 13:09:45 +0100 Subject: [PATCH 135/189] l2tp: hold socket before dropping lock in l2tp_ip{, 6}_recv() [ Upstream commit a3c18422a4b4e108bcf6a2328f48867e1003fd95 ] Socket must be held while under the protection of the l2tp lock; there is no guarantee that sk remains valid after the read_unlock_bh() call. Same issue for l2tp_ip and l2tp_ip6. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/l2tp/l2tp_ip.c | 11 ++++++----- net/l2tp/l2tp_ip6.c | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 48ab93842322..c7e6098c924e 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -184,14 +184,15 @@ pass_up: read_lock_bh(&l2tp_ip_lock); sk = __l2tp_ip_bind_lookup(net, iph->daddr, 0, tunnel_id); + if (!sk) { + read_unlock_bh(&l2tp_ip_lock); + goto discard; + } + + sock_hold(sk); read_unlock_bh(&l2tp_ip_lock); } - if (sk == NULL) - goto discard; - - sock_hold(sk); - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index bcdab1cba773..5fe0a6f6af3d 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -196,14 +196,15 @@ pass_up: read_lock_bh(&l2tp_ip6_lock); sk = __l2tp_ip6_bind_lookup(&init_net, &iph->daddr, 0, tunnel_id); + if (!sk) { + read_unlock_bh(&l2tp_ip6_lock); + goto discard; + } + + sock_hold(sk); read_unlock_bh(&l2tp_ip6_lock); } - if (sk == NULL) - goto discard; - - sock_hold(sk); - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; From 27e13827973eccb7fcbf3c70832305082aa082c2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 8 Aug 2017 17:42:46 -0500 Subject: [PATCH 136/189] tty: serial: sprd: fix error return code in sprd_probe() [ Upstream commit ec085c5a51b768947ca481f90b66653e36b3c566 ] platform_get_irq() returns an error code, but the sprd_serial driver ignores it and always returns -ENODEV. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Also, notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/sprd_serial.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c index 9dbae01d41ce..1e302caaa450 100644 --- a/drivers/tty/serial/sprd_serial.c +++ b/drivers/tty/serial/sprd_serial.c @@ -731,8 +731,8 @@ static int sprd_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(&pdev->dev, "not provide irq resource\n"); - return -ENODEV; + dev_err(&pdev->dev, "not provide irq resource: %d\n", irq); + return irq; } up->irq = irq; From 9954f1884eec0c584136a08290a62d996fd010ea Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 21 Aug 2017 16:49:58 +0200 Subject: [PATCH 137/189] video: fbdev: pxa3xx_gcu: fix error return code in pxa3xx_gcu_probe() [ Upstream commit 7588f1ecc5f0c914e669d8afb6525f47cd1c4355 ] platform_get_irq() returns an error code, but the pxa3xx_gcu driver ignores it and always returns -ENODEV. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Also, notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Sasha Levin --- drivers/video/fbdev/pxa3xx-gcu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c index 50bce45e7f3d..933619da1a94 100644 --- a/drivers/video/fbdev/pxa3xx-gcu.c +++ b/drivers/video/fbdev/pxa3xx-gcu.c @@ -626,8 +626,8 @@ static int pxa3xx_gcu_probe(struct platform_device *pdev) /* request the IRQ */ irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "no IRQ defined\n"); - return -ENODEV; + dev_err(dev, "no IRQ defined: %d\n", irq); + return irq; } ret = devm_request_irq(dev, irq, pxa3xx_gcu_handle_irq, From cc014084cb850df29b19e4bf4419f129c5cfea95 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 31 Aug 2016 13:48:19 -0700 Subject: [PATCH 138/189] sparc64 mm: Fix more TSB sizing issues [ Upstream commit 1e953d846ac015fbfcf09c857e8f893924cb629c ] Commit af1b1a9b36b8 ("sparc64 mm: Fix base TSB sizing when hugetlb pages are used") addressed the difference between hugetlb and THP pages when computing TSB sizes. The following additional issues were also discovered while working with the code. In order to save memory, THP makes use of a huge zero page. This huge zero page does not count against a task's RSS, but it does consume TSB entries. This is similar to hugetlb pages. Therefore, count huge zero page entries in hugetlb_pte_count. Accounting of THP pages is done in the routine set_pmd_at(). Unfortunately, this does not catch the case where a THP page is split. To handle this case, decrement the count in pmdp_invalidate(). pmdp_invalidate is only called when splitting a THP. However, 'sanity checks' are added in case it is ever called for other purposes. A more general issue exists with HPAGE_SIZE accounting. hugetlb_pte_count tracks the number of HPAGE_SIZE (8M) pages. This value is used to size the TSB for HPAGE_SIZE pages. However, each HPAGE_SIZE page consists of two REAL_HPAGE_SIZE (4M) pages. The TSB contains an entry for each REAL_HPAGE_SIZE page. Therefore, the number of REAL_HPAGE_SIZE pages should be used to size the huge page TSB. A new compile time constant REAL_HPAGE_PER_HPAGE is used to multiply hugetlb_pte_count before sizing the TSB. Changes from V1 - Fixed build issue if hugetlb or THP not configured Signed-off-by: Mike Kravetz Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/include/asm/page_64.h | 1 + arch/sparc/mm/fault_64.c | 1 + arch/sparc/mm/tlb.c | 35 ++++++++++++++++++++++++++++---- arch/sparc/mm/tsb.c | 18 ++++++++++------ 4 files changed, 45 insertions(+), 10 deletions(-) diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h index 8c2a8c937540..c1263fc390db 100644 --- a/arch/sparc/include/asm/page_64.h +++ b/arch/sparc/include/asm/page_64.h @@ -25,6 +25,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA +#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index e15f33715103..b01ec72522cb 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -487,6 +487,7 @@ good_area: tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; + mm_rss *= REAL_HPAGE_PER_HPAGE; if (unlikely(mm_rss > mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { if (mm->context.tsb_block[MM_TSB_HUGE].tsb) diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 3659d37b4d81..c56a195c9071 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -174,10 +174,25 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return; if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { - if (pmd_val(pmd) & _PAGE_PMD_HUGE) - mm->context.thp_pte_count++; - else - mm->context.thp_pte_count--; + /* + * Note that this routine only sets pmds for THP pages. + * Hugetlb pages are handled elsewhere. We need to check + * for huge zero page. Huge zero pages are like hugetlb + * pages in that there is no RSS, but there is the need + * for TSB entries. So, huge zero page counts go into + * hugetlb_pte_count. + */ + if (pmd_val(pmd) & _PAGE_PMD_HUGE) { + if (is_huge_zero_page(pmd_page(pmd))) + mm->context.hugetlb_pte_count++; + else + mm->context.thp_pte_count++; + } else { + if (is_huge_zero_page(pmd_page(orig))) + mm->context.hugetlb_pte_count--; + else + mm->context.thp_pte_count--; + } /* Do not try to allocate the TSB hash table if we * don't have one already. We have various locks held @@ -204,6 +219,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, } } +/* + * This routine is only called when splitting a THP + */ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -213,6 +231,15 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, set_pmd_at(vma->vm_mm, address, pmdp, entry); flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + + /* + * set_pmd_at() will not be called in a way to decrement + * thp_pte_count when splitting a THP, so do it now. + * Sanity check pmd before doing the actual decrement. + */ + if ((pmd_val(entry) & _PAGE_PMD_HUGE) && + !is_huge_zero_page(pmd_page(entry))) + (vma->vm_mm)->context.thp_pte_count--; } void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 266411291634..84cd593117a6 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -489,8 +489,10 @@ retry_tsb_alloc: int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + unsigned long mm_rss = get_mm_rss(mm); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - unsigned long total_huge_pte_count; + unsigned long saved_hugetlb_pte_count; + unsigned long saved_thp_pte_count; #endif unsigned int i; @@ -503,10 +505,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) * will re-increment the counters as the parent PTEs are * copied into the child address space. */ - total_huge_pte_count = mm->context.hugetlb_pte_count + - mm->context.thp_pte_count; + saved_hugetlb_pte_count = mm->context.hugetlb_pte_count; + saved_thp_pte_count = mm->context.thp_pte_count; mm->context.hugetlb_pte_count = 0; mm->context.thp_pte_count = 0; + + mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE); #endif /* copy_mm() copies over the parent's mm_struct before calling @@ -519,11 +523,13 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) /* If this is fork, inherit the parent's TSB size. We would * grow it to that size on the first page fault anyways. */ - tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); + tsb_grow(mm, MM_TSB_BASE, mm_rss); #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) - if (unlikely(total_huge_pte_count)) - tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); + if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count)) + tsb_grow(mm, MM_TSB_HUGE, + (saved_hugetlb_pte_count + saved_thp_pte_count) * + REAL_HPAGE_PER_HPAGE); #endif if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb)) From b99152a593de5df769c889177e731565845aefbd Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 8 Aug 2017 00:08:06 -0500 Subject: [PATCH 139/189] gpu: host1x: fix error return code in host1x_probe() [ Upstream commit 7b2c63de20080c18d0de35b292ad61fc9bc8328e ] platform_get_irq() returns an error code, but the host1x driver ignores it and always returns -ENXIO. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/gpu/host1x/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 53d3d1d45b48..ce1b10a2ae85 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -116,8 +116,8 @@ static int host1x_probe(struct platform_device *pdev) syncpt_irq = platform_get_irq(pdev, 0); if (syncpt_irq < 0) { - dev_err(&pdev->dev, "failed to get IRQ\n"); - return -ENXIO; + dev_err(&pdev->dev, "failed to get IRQ: %d\n", syncpt_irq); + return syncpt_irq; } host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); From 20d01c513f8d00633a0778df60d07d79878f24fc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 4 Aug 2017 09:47:52 -0700 Subject: [PATCH 140/189] sparc64: Fix exception handling in UltraSPARC-III memcpy. [ Upstream commit 0ede1c401332173ab0693121dc6cde04a4dbf131 ] Mikael Pettersson reported that some test programs in the strace-4.18 testsuite cause an OOPS. After some debugging it turns out that garbage values are returned when an exception occurs, causing the fixup memset() to be run with bogus arguments. The problem is that two of the exception handler stubs write the successfully copied length into the wrong register. Fixes: ee841d0aff64 ("sparc64: Convert U3copy_{from,to}_user to accurate exception reporting.") Reported-by: Mikael Pettersson Tested-by: Mikael Pettersson Reviewed-by: Sam Ravnborg Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- arch/sparc/lib/U3memcpy.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 54f98706b03b..5a8cb37f0a3b 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -145,13 +145,13 @@ ENDPROC(U3_retl_o2_plus_GS_plus_0x08) ENTRY(U3_retl_o2_and_7_plus_GS) and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS) ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) add GLOBAL_SPARE, 8, GLOBAL_SPARE and %o2, 7, %o2 retl - add %o2, GLOBAL_SPARE, %o2 + add %o2, GLOBAL_SPARE, %o0 ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) #endif From 7f71f461af4a3e8e29e543d15fca150a2057faf4 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 3 Aug 2017 17:00:01 -0500 Subject: [PATCH 141/189] gpio: msic: fix error return code in platform_msic_gpio_probe() [ Upstream commit ca1f3ae3154ad6d08caa740c99be0d86644a4e44 ] platform_get_irq() returns an error code, but the gpio-msic driver ignores it and always returns -EINVAL. This is not correct, and prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-msic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-msic.c b/drivers/gpio/gpio-msic.c index 22523aae8abe..3abf066f93d3 100644 --- a/drivers/gpio/gpio-msic.c +++ b/drivers/gpio/gpio-msic.c @@ -266,8 +266,8 @@ static int platform_msic_gpio_probe(struct platform_device *pdev) int i; if (irq < 0) { - dev_err(dev, "no IRQ line\n"); - return -EINVAL; + dev_err(dev, "no IRQ line: %d\n", irq); + return irq; } if (!pdata || !pdata->gpio_base) { From 047719209751af5133cf7614f2d6edce495d11e5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 7 Aug 2017 23:14:23 -0500 Subject: [PATCH 142/189] usb: imx21-hcd: fix error return code in imx21_probe() [ Upstream commit 46edf52d08342b3dc1f9a61c5200ab8b1c0f5a37 ] platform_get_irq() returns an error code, but the imx21-hcd driver ignores it and always returns -ENXIO. This is not correct, and prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print error message and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/imx21-hcd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/imx21-hcd.c b/drivers/usb/host/imx21-hcd.c index f542045dc2a6..e25d72e0527f 100644 --- a/drivers/usb/host/imx21-hcd.c +++ b/drivers/usb/host/imx21-hcd.c @@ -1849,8 +1849,10 @@ static int imx21_probe(struct platform_device *pdev) if (!res) return -ENODEV; irq = platform_get_irq(pdev, 0); - if (irq < 0) - return -ENXIO; + if (irq < 0) { + dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq); + return irq; + } hcd = usb_create_hcd(&imx21_hc_driver, &pdev->dev, dev_name(&pdev->dev)); From fd5939e73ea78ac33e597de3b7677c3d7dc50e63 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 8 Aug 2017 17:26:13 -0500 Subject: [PATCH 143/189] usb: ehci-omap: fix error return code in ehci_hcd_omap_probe() [ Upstream commit 99dbff202e28ad1dadf55b058bcae7908678e963 ] platform_get_irq() returns an error code, but the ehci-omap driver ignores it and always returns -ENODEV. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Also, notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/host/ehci-omap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c index a24720beb39d..cccde8217f28 100644 --- a/drivers/usb/host/ehci-omap.c +++ b/drivers/usb/host/ehci-omap.c @@ -130,8 +130,8 @@ static int ehci_hcd_omap_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "EHCI irq failed\n"); - return -ENODEV; + dev_err(dev, "EHCI irq failed: %d\n", irq); + return irq; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); From 18747e4caac9c6d7352e2af9c57ccaf0e94776f8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 7 Aug 2017 23:25:34 -0500 Subject: [PATCH 144/189] usb: dwc3: omap: fix error return code in dwc3_omap_probe() [ Upstream commit 0ae99ecba7928c7bc66cf14d8a88f0b6ec9fc78e ] platform_get_irq() returns an error code, but the dwc3-omap driver ignores it and always returns -EINVAL. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Felipe Balbi Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-omap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 22e9606d8e08..9078af0ce06c 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -469,8 +469,8 @@ static int dwc3_omap_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "missing IRQ resource\n"); - return -EINVAL; + dev_err(dev, "missing IRQ resource: %d\n", irq); + return irq; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); From 8025a41779ebae3670dc060ff759be2fa4bcc1f0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 7 Aug 2017 23:59:44 -0500 Subject: [PATCH 145/189] spi/bcm63xx-hspi: fix error return code in bcm63xx_hsspi_probe() [ Upstream commit 378da4a65f3a0390837b38145bb5d8c2d20c2cf7 ] platform_get_irq() returns an error code, but the spi-bcm63xx-hsspi driver ignores it and always returns -ENXIO. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcm63xx-hsspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c index 55789f7cda92..645f428ad0a2 100644 --- a/drivers/spi/spi-bcm63xx-hsspi.c +++ b/drivers/spi/spi-bcm63xx-hsspi.c @@ -336,8 +336,8 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "no irq\n"); - return -ENXIO; + dev_err(dev, "no irq: %d\n", irq); + return irq; } res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); From f46b1dc562a56970e0669ecd488ddb71574416f8 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 8 Aug 2017 13:22:30 +0100 Subject: [PATCH 146/189] MIPS: Handle non word sized instructions when examining frame [ Upstream commit 11887ed172a6960673f130dad8f8fb42778f64d7 ] Commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") added fairly broken support for handling 16bit microMIPS instructions in get_frame_info(). It adjusts the instruction pointer by 16bits in the case of a 16bit sp move instruction, but not any other 16bit instruction. Commit b6c7a324df37 ("MIPS: Fix get_frame_info() handling of microMIPS function size") goes some way to fixing get_frame_info() to iterate over microMIPS instuctions, but the instruction pointer is still manipulated using a postincrement, and is of union mips_instruction type. Since the union is sized to the largest member (a word), but microMIPS instructions are a mix of halfword and word sizes, the function does not always iterate correctly, ending up misaligned with the instruction stream and interpreting it incorrectly. Since the instruction modifying the stack pointer is usually the first in the function, that one is usually handled correctly. But the instruction which saves the return address to the sp is some variable number of instructions into the frame and is frequently missed due to not being on a word boundary, leading to incomplete walking of the stack. Fix this by incrementing the instruction pointer based on the size of the previously decoded instruction (& remove the hack introduced by commit 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") which adjusts the instruction pointer in the case of a 16bit sp move instruction, but not any other). Fixes: 34c2f668d0f6b ("MIPS: microMIPS: Add unaligned access support.") Signed-off-by: Matt Redfearn Cc: Marcin Nowakowski Cc: James Hogan Cc: Ingo Molnar Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16953/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/kernel/process.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index ed6cac4a4df0..a9cc74354df8 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -341,6 +341,7 @@ static int get_frame_info(struct mips_frame_info *info) bool is_mmips = IS_ENABLED(CONFIG_CPU_MICROMIPS); union mips_instruction insn, *ip, *ip_end; const unsigned int max_insns = 128; + unsigned int last_insn_size = 0; unsigned int i; info->pc_offset = -1; @@ -352,15 +353,19 @@ static int get_frame_info(struct mips_frame_info *info) ip_end = (void *)ip + info->func_size; - for (i = 0; i < max_insns && ip < ip_end; i++, ip++) { + for (i = 0; i < max_insns && ip < ip_end; i++) { + ip = (void *)ip + last_insn_size; if (is_mmips && mm_insn_16bit(ip->halfword[0])) { insn.halfword[0] = 0; insn.halfword[1] = ip->halfword[0]; + last_insn_size = 2; } else if (is_mmips) { insn.halfword[0] = ip->halfword[1]; insn.halfword[1] = ip->halfword[0]; + last_insn_size = 4; } else { insn.word = ip->word; + last_insn_size = 4; } if (is_jump_ins(&insn)) @@ -382,8 +387,6 @@ static int get_frame_info(struct mips_frame_info *info) tmp = (ip->halfword[0] >> 1); info->frame_size = -(signed short)(tmp & 0xf); } - ip = (void *) &ip->halfword[1]; - ip--; } else #endif info->frame_size = - ip->i_format.simmediate; From 61d2a3e63bc2a5a149fc01c367fd59cc5e5cf9ef Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 7 Aug 2017 23:52:34 -0500 Subject: [PATCH 147/189] spi/bcm63xx: fix error return code in bcm63xx_spi_probe() [ Upstream commit ba8afe94723e9ba665aee9cca649fb2c80f7304c ] platform_get_irq() returns an error code, but the spi-bcm63xx driver ignores it and always returns -ENXIO. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-bcm63xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index bf9a610e5b89..f14500910bc2 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -496,8 +496,8 @@ static int bcm63xx_spi_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(dev, "no irq\n"); - return -ENXIO; + dev_err(dev, "no irq: %d\n", irq); + return irq; } clk = devm_clk_get(dev, "spi"); From 87c7239cde199a612892d3347e95342272e8adc5 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 7 Aug 2017 23:45:02 -0500 Subject: [PATCH 148/189] spi: xlp: fix error return code in xlp_spi_probe() [ Upstream commit 9a6b94796ae6feaf275ec6200e9b2964db208182 ] platform_get_irq() returns an error code, but the spi-xlp driver ignores it and always returns -EINVAL. This is not correct and, prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-xlp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-xlp.c b/drivers/spi/spi-xlp.c index 8f04feca6ee3..0ddb0adaa8aa 100644 --- a/drivers/spi/spi-xlp.c +++ b/drivers/spi/spi-xlp.c @@ -392,8 +392,8 @@ static int xlp_spi_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { - dev_err(&pdev->dev, "no IRQ resource found\n"); - return -EINVAL; + dev_err(&pdev->dev, "no IRQ resource found: %d\n", irq); + return irq; } err = devm_request_irq(&pdev->dev, irq, xlp_spi_interrupt, 0, pdev->name, xspi); From dfb3a6a31fd001e89973c53123752c608f0acd07 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 30 Jun 2017 15:43:50 -0500 Subject: [PATCH 149/189] ASoC: spear: fix error return code in spdif_in_probe() [ Upstream commit 27d30400c448264c1ac9434cb836de0c230af213 ] platform_get_irq() returns an error code, but the spdif_in driver ignores it and always returns -EINVAL. This is not correct, and prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print error message and propagate the return value of platform_get_irq on failure. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/spear/spdif_in.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/spear/spdif_in.c b/sound/soc/spear/spdif_in.c index 977a078eb92f..7f32527fc3c8 100644 --- a/sound/soc/spear/spdif_in.c +++ b/sound/soc/spear/spdif_in.c @@ -223,8 +223,10 @@ static int spdif_in_probe(struct platform_device *pdev) host->io_base = io_base; host->irq = platform_get_irq(pdev, 0); - if (host->irq < 0) - return -EINVAL; + if (host->irq < 0) { + dev_warn(&pdev->dev, "failed to get IRQ: %d\n", host->irq); + return host->irq; + } host->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(host->clk)) From 1bd2b909fd4579dcb5d231ab276e2d2f20b81a82 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 3 Jul 2017 07:47:38 -0500 Subject: [PATCH 150/189] PM / devfreq: tegra: fix error return code in tegra_devfreq_probe() [ Upstream commit 9e578b37505018622dfafc40eed7cd78ff2af221 ] platform_get_irq() returns an error code, but the tegra-devfreq driver ignores it and always returns -ENODEV. This is not correct, and prevents -EPROBE_DEFER from being propagated properly. Notice that platform_get_irq() no longer returns 0 on error: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e330b9a6bb35dc7097a4f02cb1ae7b6f96df92af Print and propagate the return value of platform_get_irq on failure. Reviewed-by: Chanwoo Choi Signed-off-by: Gustavo A. R. Silva Signed-off-by: MyungJoo Ham Signed-off-by: Sasha Levin --- drivers/devfreq/tegra-devfreq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c index 848b93ee930f..64a2e02b87d7 100644 --- a/drivers/devfreq/tegra-devfreq.c +++ b/drivers/devfreq/tegra-devfreq.c @@ -688,9 +688,9 @@ static int tegra_devfreq_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (irq <= 0) { - dev_err(&pdev->dev, "Failed to get IRQ\n"); - return -ENODEV; + if (irq < 0) { + dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq); + return irq; } platform_set_drvdata(pdev, tegra); From b1f4be0ee470f7244d85152dbde7f50a23d15c0b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 27 Apr 2017 19:29:34 +0200 Subject: [PATCH 151/189] bonding: avoid defaulting hard_header_len to ETH_HLEN on slave removal [ Upstream commit 19cdead3e2ef8ed765c5d1ce48057ca9d97b5094 ] On slave list updates, the bonding driver computes its hard_header_len as the maximum of all enslaved devices's hard_header_len. If the slave list is empty, e.g. on last enslaved device removal, ETH_HLEN is used. Since the bonding header_ops are set only when the first enslaved device is attached, the above can lead to header_ops->create() being called with the wrong skb headroom in place. If bond0 is configured on top of ipoib devices, with the following commands: ifup bond0 for slave in $BOND_SLAVES_LIST; do ip link set dev $slave nomaster done ping -c 1 we will obtain a skb_under_panic() with a similar call trace: skb_push+0x3d/0x40 push_pseudo_header+0x17/0x30 [ib_ipoib] ipoib_hard_header+0x4e/0x80 [ib_ipoib] arp_create+0x12f/0x220 arp_send_dst.part.19+0x28/0x50 arp_solicit+0x115/0x290 neigh_probe+0x4d/0x70 __neigh_event_send+0xa7/0x230 neigh_resolve_output+0x12e/0x1c0 ip_finish_output2+0x14b/0x390 ip_finish_output+0x136/0x1e0 ip_output+0x76/0xe0 ip_local_out+0x35/0x40 ip_send_skb+0x19/0x40 ip_push_pending_frames+0x33/0x40 raw_sendmsg+0x7d3/0xb50 inet_sendmsg+0x31/0xb0 sock_sendmsg+0x38/0x50 SYSC_sendto+0x102/0x190 SyS_sendto+0xe/0x10 do_syscall_64+0x67/0x180 entry_SYSCALL64_slow_path+0x25/0x25 This change addresses the issue avoiding updating the bonding device hard_header_len when the slaves list become empty, forbidding to shrink it below the value used by header_ops->create(). The bug is there since commit 54ef31371407 ("[PATCH] bonding: Handle large hard_header_len") but the panic can be triggered only since commit fc791b633515 ("IB/ipoib: move back IB LL address into the hard header"). Reported-by: Norbert P Fixes: 54ef31371407 ("[PATCH] bonding: Handle large hard_header_len") Fixes: fc791b633515 ("IB/ipoib: move back IB LL address into the hard header") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Paolo Abeni Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 78da1b7b4d86..a32dcb6718ca 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1107,11 +1107,11 @@ static void bond_compute_features(struct bonding *bond) gso_max_size = min(gso_max_size, slave->dev->gso_max_size); gso_max_segs = min(gso_max_segs, slave->dev->gso_max_segs); } + bond_dev->hard_header_len = max_hard_header_len; done: bond_dev->vlan_features = vlan_features; bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; - bond_dev->hard_header_len = max_hard_header_len; bond_dev->gso_max_segs = gso_max_segs; netif_set_gso_max_size(bond_dev, gso_max_size); From 83aa7d13df9f9cabc87ce1c70f86c5b35c0f5ead Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Thu, 2 Mar 2017 09:21:33 -0800 Subject: [PATCH 152/189] scsi: aacraid: Fix typo in blink status [ Upstream commit 934767c56b0d9dbb95a40e9e6e4d9dcdc3a165ad ] The return status of the adapter check on KERNEL_PANIC is supposed to be the upper 16 bits of the OMR status register. Fixes: c421530bf848604e (scsi: aacraid: Reorder Adpater status check) Reported-by: Dan Carpenter Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/aacraid/src.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index e415e1c58eb5..cf3ac0654a3a 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -444,7 +444,7 @@ err_out: return -1; err_blink: - return (status > 16) & 0xFF; + return (status >> 16) & 0xFF; } /** From 0f951e1a4186372583e05850240e861d8ca8b6a2 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 8 Aug 2017 13:22:33 +0100 Subject: [PATCH 153/189] MIPS: microMIPS: Fix decoding of swsp16 instruction [ Upstream commit cea8cd498f4f1c30ea27e3664b3c671e495c4fce ] When the immediate encoded in the instruction is accessed, it is sign extended due to being a signed value being assigned to a signed integer. The ISA specifies that this operation is an unsigned operation. The sign extension leads us to incorrectly decode: 801e9c8e: cbf1 sw ra,68(sp) As having an immediate of 1073741809. Since the instruction format does not specify signed/unsigned, and this is currently the only location to use this instuction format, change it to an unsigned immediate. Fixes: bb9bc4689b9c ("MIPS: Calculate microMIPS ra properly when unwinding the stack") Suggested-by: Paul Burton Signed-off-by: Matt Redfearn Reviewed-by: James Hogan Cc: Marcin Nowakowski Cc: Miodrag Dinic Cc: Ingo Molnar Cc: David Daney Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16957/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/include/uapi/asm/inst.h | 2 +- arch/mips/kernel/process.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index 9b44d5a816fa..1b6f2f219298 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -846,7 +846,7 @@ struct mm16_r3_format { /* Load from global pointer format */ struct mm16_r5_format { /* Load/store from stack pointer format */ __BITFIELD_FIELD(unsigned int opcode : 6, __BITFIELD_FIELD(unsigned int rt : 5, - __BITFIELD_FIELD(signed int simmediate : 5, + __BITFIELD_FIELD(unsigned int imm : 5, __BITFIELD_FIELD(unsigned int : 16, /* Ignored */ ;)))) }; diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index a9cc74354df8..ebd8a715fe38 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -207,7 +207,7 @@ static inline int is_ra_save_ins(union mips_instruction *ip, int *poff) if (ip->mm16_r5_format.rt != 31) return 0; - *poff = ip->mm16_r5_format.simmediate; + *poff = ip->mm16_r5_format.imm; *poff = (*poff << 2) / sizeof(ulong); return 1; From a5ce9639fc792b1321ef9f88cbbf5e5aeb127adc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Gr=C3=B6nke?= Date: Tue, 26 Jun 2018 10:12:18 +0000 Subject: [PATCH 154/189] igb: Remove superfluous reset to PHY and page 0 selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2a83fba6cae89dd9c0625e68ff8ffff791c67ac0 ] This patch reverts two previous applied patches to fix an issue that appeared when using SGMII based SFP modules. In the current state the driver will try to reset the PHY before obtaining the phy_addr of the SGMII attached PHY. That leads to an error in e1000_write_phy_reg_sgmii_82575. Causing the initialization to fail: igb: Intel(R) Gigabit Ethernet Network Driver - version 5.4.0-k igb: Copyright (c) 2007-2014 Intel Corporation. igb: probe of ????:??:??.? failed with error -3 The patches being reverted are: commit 182785335447957409282ca745aa5bc3968facee Author: Aaron Sierra Date: Tue Nov 29 10:03:56 2016 -0600 igb: reset the PHY before reading the PHY ID commit 440aeca4b9858248d8f16d724d9fa87a4f65fa33 Author: Matwey V Kornilov Date: Thu Nov 24 13:32:48 2016 +0300 igb: Explicitly select page 0 at initialization The first reverted patch directly causes the problem mentioned above. In case of SGMII the phy_addr is not known at this point and will only be obtained by 'igb_get_phy_id_82575' further down in the code. The second removed patch selects forces selection of page 0 in the PHY. Something that the reset tries to address as well. As pointed out by Alexander Duzck, the patch below fixes the same issue but in the proper location: commit 4e684f59d760a2c7c716bb60190783546e2d08a1 Author: Chris J Arges Date: Wed Nov 2 09:13:42 2016 -0500 igb: Workaround for igb i210 firmware issue Reverts: 440aeca4b9858248d8f16d724d9fa87a4f65fa33. Reverts: 182785335447957409282ca745aa5bc3968facee. Signed-off-by: Christian Grönke Reviewed-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/e1000_82575.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index f3f3b95d5512..97bf0c3d5c69 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -223,17 +223,6 @@ static s32 igb_init_phy_params_82575(struct e1000_hw *hw) hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; - /* Make sure the PHY is in a good state. Several people have reported - * firmware leaving the PHY's page select register set to something - * other than the default of zero, which causes the PHY ID read to - * access something other than the intended register. - */ - ret_val = hw->phy.ops.reset(hw); - if (ret_val) { - hw_dbg("Error resetting the PHY.\n"); - goto out; - } - /* Set phy->phy_addr and phy->id. */ ret_val = igb_get_phy_id_82575(hw); if (ret_val) From fbba23bb4071123431812a653ff98de7b94fe61e Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 30 Jul 2017 21:28:15 +0100 Subject: [PATCH 155/189] MIPS: DEC: Fix an int-handler.S CPU_DADDI_WORKAROUNDS regression [ Upstream commit 68fe55680d0f3342969f49412fceabb90bdfadba ] Fix a commit 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots") regression and remove assembly errors: arch/mips/dec/int-handler.S: Assembler messages: arch/mips/dec/int-handler.S:162: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:163: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:229: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:230: Error: Macro used $at after ".set noat" triggering with with the CPU_DADDI_WORKAROUNDS option set and the DADDIU instruction. This is because with that option in place the instruction becomes a macro, which expands to an LI/DADDU (or actually ADDIU/DADDU) sequence that uses $at as a temporary register. With CPU_DADDI_WORKAROUNDS we only support `-msym32' compilation though, and this is already enforced in arch/mips/Makefile, so choose the 32-bit expansion variant for the supported configurations and then replace the 64-bit variant with #error just in case. Fixes: 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots") Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.8+ Patchwork: https://patchwork.linux-mips.org/patch/16893/ Signed-off-by: Ralf Baechle Signed-off-by: Sasha Levin --- arch/mips/dec/int-handler.S | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index 554d1da97743..21f4a9fe82fa 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -147,23 +147,12 @@ * Find irq with highest priority */ # open coded PTR_LA t1, cpu_mask_nr_tbl -#if (_MIPS_SZPTR == 32) +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) # open coded la t1, cpu_mask_nr_tbl lui t1, %hi(cpu_mask_nr_tbl) addiu t1, %lo(cpu_mask_nr_tbl) - -#endif -#if (_MIPS_SZPTR == 64) - # open coded dla t1, cpu_mask_nr_tbl - .set push - .set noat - lui t1, %highest(cpu_mask_nr_tbl) - lui AT, %hi(cpu_mask_nr_tbl) - daddiu t1, t1, %higher(cpu_mask_nr_tbl) - daddiu AT, AT, %lo(cpu_mask_nr_tbl) - dsll t1, 32 - daddu t1, t1, AT - .set pop +#else +#error GCC `-msym32' option required for 64-bit DECstation builds #endif 1: lw t2,(t1) nop @@ -214,23 +203,12 @@ * Find irq with highest priority */ # open coded PTR_LA t1,asic_mask_nr_tbl -#if (_MIPS_SZPTR == 32) +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) # open coded la t1, asic_mask_nr_tbl lui t1, %hi(asic_mask_nr_tbl) addiu t1, %lo(asic_mask_nr_tbl) - -#endif -#if (_MIPS_SZPTR == 64) - # open coded dla t1, asic_mask_nr_tbl - .set push - .set noat - lui t1, %highest(asic_mask_nr_tbl) - lui AT, %hi(asic_mask_nr_tbl) - daddiu t1, t1, %higher(asic_mask_nr_tbl) - daddiu AT, AT, %lo(asic_mask_nr_tbl) - dsll t1, 32 - daddu t1, t1, AT - .set pop +#else +#error GCC `-msym32' option required for 64-bit DECstation builds #endif 2: lw t2,(t1) nop From 55df64d96155659e06c6c4ae2126f23b08cfe8e2 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 12 Sep 2018 08:23:01 +0200 Subject: [PATCH 156/189] ARM: dts: imx53-qsb: disable 1.2GHz OPP [ Upstream commit eea96566c189c77e5272585984eb2729881a2f1d ] The maximum CPU frequency for the i.MX53 QSB is 1GHz, so disable the 1.2GHz OPP. This makes the board work again with configs that have cpufreq enabled like imx_v6_v7_defconfig on which the board stopped working with the addition of cpufreq-dt support. Fixes: 791f416608 ("ARM: dts: imx53: add cpufreq-dt support") Signed-off-by: Sascha Hauer Signed-off-by: Shawn Guo Signed-off-by: Sasha Levin --- arch/arm/boot/dts/imx53-qsb-common.dtsi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 53fd75c8ffcf..47894b41e4e2 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -130,6 +130,17 @@ }; }; +&cpu0 { + /* CPU rated to 1GHz, not 1.2GHz as per the default settings */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + 1000000 1200000 + >; +}; + &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; From fa2075baa95c1957b477c830415d4d6b72282ddc Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Fri, 12 Oct 2018 21:34:40 -0700 Subject: [PATCH 157/189] fs/fat/fatent.c: add cond_resched() to fat_count_free_clusters() [ Upstream commit ac081c3be3fae6d0cc3e1862507fca3862d30b67 ] On non-preempt kernels this loop can take a long time (more than 50 ticks) processing through entries. Link: http://lkml.kernel.org/r/20181010172623.57033-1-khazhy@google.com Signed-off-by: Khazhismel Kumykov Acked-by: OGAWA Hirofumi Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- fs/fat/fatent.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index a70e37c47a78..e3fc477728b3 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -681,6 +681,7 @@ int fat_count_free_clusters(struct super_block *sb) if (ops->ent_get(&fatent) == FAT_ENT_FREE) free++; } while (fat_ent_next(sbi, &fatent)); + cond_resched(); } sbi->free_clusters = free; sbi->free_clus_valid = 1; From 25e2243ad203b1afa12ce15004a4e67f287d9137 Mon Sep 17 00:00:00 2001 From: Kimmo Rautkoski Date: Sat, 20 Oct 2018 22:44:42 +0300 Subject: [PATCH 158/189] mtd: spi-nor: Add support for is25wp series chips [ Upstream commit d616f81cdd2a21edfa90a595a4e9b143f5ba8414 ] Added support for is25wp032, is25wp064 and is25wp128. Signed-off-by: Kimmo Rautkoski Reviewed-by: Marek Vasut Signed-off-by: Boris Brezillon [ Adrian Bunk: Trivial adaption to changed context. ] Signed-off-by: Adrian Bunk Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/spi-nor.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 64d6f053c2a5..276998ea0267 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -708,6 +708,12 @@ static const struct flash_info spi_nor_ids[] = { /* ISSI */ { "is25cd512", INFO(0x7f9d20, 0, 32 * 1024, 2, SECT_4K) }, + { "is25wp032", INFO(0x9d7016, 0, 64 * 1024, 64, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "is25wp064", INFO(0x9d7017, 0, 64 * 1024, 128, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "is25wp128", INFO(0x9d7018, 0, 64 * 1024, 256, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, /* Macronix */ { "mx25l512e", INFO(0xc22010, 0, 64 * 1024, 1, SECT_4K) }, From 6f52608e74f3712b20d6e1c24a84d79ea77e4e34 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 5 Jul 2018 15:15:27 +0200 Subject: [PATCH 159/189] perf tools: Disable parallelism for 'make clean' [ Upstream commit da15fc2fa9c07b23db8f5e479bd8a9f0d741ca07 ] The Yocto build system does a 'make clean' when rebuilding due to changed dependencies, and that consistently fails for me (causing the whole BSP build to fail) with errors such as | find: '[...]/perf/1.0-r9/perf-1.0/plugin_mac80211.so': No such file or directory | find: '[...]/perf/1.0-r9/perf-1.0/plugin_mac80211.so': No such file or directory | find: find: '[...]/perf/1.0-r9/perf-1.0/libtraceevent.a''[...]/perf/1.0-r9/perf-1.0/libtraceevent.a': No such file or directory: No such file or directory | [...] | find: cannot delete '/mnt/xfs/devel/pil/yocto/tmp-glibc/work/wandboard-oe-linux-gnueabi/perf/1.0-r9/perf-1.0/util/.pstack.o.cmd': No such file or directory Apparently (despite the comment), 'make clean' ends up launching multiple sub-makes that all want to remove the same things - perhaps this only happens in combination with a O=... parameter. In any case, we don't lose much by explicitly disabling the parallelism for the clean target, and it makes automated builds much more reliable. Signed-off-by: Rasmus Villemoes Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180705131527.19749-1-linux@rasmusvillemoes.dk Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index dcd9a70c7193..55933b2eb932 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -69,10 +69,10 @@ all tags TAGS: $(make) # -# The clean target is not really parallel, don't print the jobs info: +# Explicitly disable parallelism for the clean target. # clean: - $(make) + $(make) -j1 # # The build-test target is not really parallel, don't print the jobs info: From a1519237965f31b5780c8f63e6c3649600fb5754 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 26 Oct 2018 10:28:43 +0800 Subject: [PATCH 160/189] bridge: do not add port to router list when receives query with source 0.0.0.0 commit 5a2de63fd1a59c30c02526d427bc014b98adf508 upstream. Based on RFC 4541, 2.1.1. IGMP Forwarding Rules The switch supporting IGMP snooping must maintain a list of multicast routers and the ports on which they are attached. This list can be constructed in any combination of the following ways: a) This list should be built by the snooping switch sending Multicast Router Solicitation messages as described in IGMP Multicast Router Discovery [MRDISC]. It may also snoop Multicast Router Advertisement messages sent by and to other nodes. b) The arrival port for IGMP Queries (sent by multicast routers) where the source address is not 0.0.0.0. We should not add the port to router list when receives query with source 0.0.0.0. Reported-by: Ying Xu Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Acked-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d80c15d028fe..7a6d053f76e2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1261,7 +1261,15 @@ static void br_multicast_query_received(struct net_bridge *br, return; br_multicast_update_query_timer(br, query, max_delay); - br_multicast_mark_router(br, port); + + /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, + * the arrival port for IGMP Queries where the source address + * is 0.0.0.0 should not be added to router port list. + */ + if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || + (saddr->proto == htons(ETH_P_IPV6) && + !ipv6_addr_any(&saddr->u.ip6))) + br_multicast_mark_router(br, port); } static int br_ip4_multicast_query(struct net_bridge *br, From 4100be3e8177049f5bf40769a18f708fc2c3a9b7 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 27 Oct 2018 12:07:47 +0300 Subject: [PATCH 161/189] net: bridge: remove ipv6 zero address check in mcast queries commit 0fe5119e267f3e3d8ac206895f5922195ec55a8a upstream. Recently a check was added which prevents marking of routers with zero source address, but for IPv6 that cannot happen as the relevant RFCs actually forbid such packets: RFC 2710 (MLDv1): "To be valid, the Query message MUST come from a link-local IPv6 Source Address, be at least 24 octets long, and have a correct MLD checksum." Same goes for RFC 3810. And also it can be seen as a requirement in ipv6_mc_check_mld_query() which is used by the bridge to validate the message before processing it. Thus any queries with :: source address won't be processed anyway. So just remove the check for zero IPv6 source address from the query processing function. Fixes: 5a2de63fd1a5 ("bridge: do not add port to router list when receives query with source 0.0.0.0") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Cc: Hangbin Liu Signed-off-by: Greg Kroah-Hartman --- net/bridge/br_multicast.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 7a6d053f76e2..270d9c9a5331 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1267,8 +1267,7 @@ static void br_multicast_query_received(struct net_bridge *br, * is 0.0.0.0 should not be added to router port list. */ if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || - (saddr->proto == htons(ETH_P_IPV6) && - !ipv6_addr_any(&saddr->u.ip6))) + saddr->proto == htons(ETH_P_IPV6)) br_multicast_mark_router(br, port); } From ef1cb6b06b39482a043d3c06a53e88c689c9463d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Oct 2018 18:58:53 -0700 Subject: [PATCH 162/189] ipv6: mcast: fix a use-after-free in inet6_mc_check [ Upstream commit dc012f3628eaecfb5ba68404a5c30ef501daf63d ] syzbot found a use-after-free in inet6_mc_check [1] The problem here is that inet6_mc_check() uses rcu and read_lock(&iml->sflock) So the fact that ip6_mc_leave_src() is called under RTNL and the socket lock does not help us, we need to acquire iml->sflock in write mode. In the future, we should convert all this stuff to RCU. [1] BUG: KASAN: use-after-free in ipv6_addr_equal include/net/ipv6.h:521 [inline] BUG: KASAN: use-after-free in inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649 Read of size 8 at addr ffff8801ce7f2510 by task syz-executor0/22432 CPU: 1 PID: 22432 Comm: syz-executor0 Not tainted 4.19.0-rc7+ #280 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 ipv6_addr_equal include/net/ipv6.h:521 [inline] inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649 __raw_v6_lookup+0x320/0x3f0 net/ipv6/raw.c:98 ipv6_raw_deliver net/ipv6/raw.c:183 [inline] raw6_local_deliver+0x3d3/0xcb0 net/ipv6/raw.c:240 ip6_input_finish+0x467/0x1aa0 net/ipv6/ip6_input.c:345 NF_HOOK include/linux/netfilter.h:289 [inline] ip6_input+0xe9/0x600 net/ipv6/ip6_input.c:426 ip6_mc_input+0x48a/0xd20 net/ipv6/ip6_input.c:503 dst_input include/net/dst.h:450 [inline] ip6_rcv_finish+0x17a/0x330 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:289 [inline] ipv6_rcv+0x120/0x640 net/ipv6/ip6_input.c:271 __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 netif_receive_skb_internal+0x12c/0x620 net/core/dev.c:5126 napi_frags_finish net/core/dev.c:5664 [inline] napi_gro_frags+0x75a/0xc90 net/core/dev.c:5737 tun_get_user+0x3189/0x4250 drivers/net/tun.c:1923 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1968 call_write_iter include/linux/fs.h:1808 [inline] do_iter_readv_writev+0x8b0/0xa80 fs/read_write.c:680 do_iter_write+0x185/0x5f0 fs/read_write.c:959 vfs_writev+0x1f1/0x360 fs/read_write.c:1004 do_writev+0x11a/0x310 fs/read_write.c:1039 __do_sys_writev fs/read_write.c:1112 [inline] __se_sys_writev fs/read_write.c:1109 [inline] __x64_sys_writev+0x75/0xb0 fs/read_write.c:1109 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457421 Code: 75 14 b8 14 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 34 b5 fb ff c3 48 83 ec 08 e8 1a 2d 00 00 48 89 04 24 b8 14 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 63 2d 00 00 48 89 d0 48 83 c4 08 48 3d 01 RSP: 002b:00007f2d30ecaba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 000000000000003e RCX: 0000000000457421 RDX: 0000000000000001 RSI: 00007f2d30ecabf0 RDI: 00000000000000f0 RBP: 0000000020000500 R08: 00000000000000f0 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000293 R12: 00007f2d30ecb6d4 R13: 00000000004c4890 R14: 00000000004d7b90 R15: 00000000ffffffff Allocated by task 22437: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 __do_kmalloc mm/slab.c:3718 [inline] __kmalloc+0x14e/0x760 mm/slab.c:3727 kmalloc include/linux/slab.h:518 [inline] sock_kmalloc+0x15a/0x1f0 net/core/sock.c:1983 ip6_mc_source+0x14dd/0x1960 net/ipv6/mcast.c:427 do_ipv6_setsockopt.isra.9+0x3afb/0x45d0 net/ipv6/ipv6_sockglue.c:743 ipv6_setsockopt+0xbd/0x170 net/ipv6/ipv6_sockglue.c:933 rawv6_setsockopt+0x59/0x140 net/ipv6/raw.c:1069 sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3038 __sys_setsockopt+0x1ba/0x3c0 net/socket.c:1902 __do_sys_setsockopt net/socket.c:1913 [inline] __se_sys_setsockopt net/socket.c:1910 [inline] __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1910 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 22430: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xcf/0x230 mm/slab.c:3813 __sock_kfree_s net/core/sock.c:2004 [inline] sock_kfree_s+0x29/0x60 net/core/sock.c:2010 ip6_mc_leave_src+0x11a/0x1d0 net/ipv6/mcast.c:2448 __ipv6_sock_mc_close+0x20b/0x4e0 net/ipv6/mcast.c:310 ipv6_sock_mc_close+0x158/0x1d0 net/ipv6/mcast.c:328 inet6_release+0x40/0x70 net/ipv6/af_inet6.c:452 __sock_release+0xd7/0x250 net/socket.c:579 sock_close+0x19/0x20 net/socket.c:1141 __fput+0x385/0xa30 fs/file_table.c:278 ____fput+0x15/0x20 fs/file_table.c:309 task_work_run+0x1e8/0x2a0 kernel/task_work.c:113 tracehook_notify_resume include/linux/tracehook.h:193 [inline] exit_to_usermode_loop+0x318/0x380 arch/x86/entry/common.c:166 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] syscall_return_slowpath arch/x86/entry/common.c:268 [inline] do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801ce7f2500 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 16 bytes inside of 192-byte region [ffff8801ce7f2500, ffff8801ce7f25c0) The buggy address belongs to the page: page:ffffea000739fc80 count:1 mapcount:0 mapping:ffff8801da800040 index:0x0 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0006f6e548 ffffea000737b948 ffff8801da800040 raw: 0000000000000000 ffff8801ce7f2000 0000000100000010 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801ce7f2400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801ce7f2480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc >ffff8801ce7f2500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8801ce7f2580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8801ce7f2600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 091cee551cd9..a5ec9a0cbb80 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2390,17 +2390,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, { int err; - /* callers have the socket lock and rtnl lock - * so no other readers or writers of iml or its sflist - */ + write_lock_bh(&iml->sflock); if (!iml->sflist) { /* any-source empty exclude case */ - return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); + } else { + err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, + iml->sflist->sl_count, iml->sflist->sl_addr, 0); + sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); + iml->sflist = NULL; } - err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, - iml->sflist->sl_count, iml->sflist->sl_addr, 0); - sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); - iml->sflist = NULL; + write_unlock_bh(&iml->sflock); return err; } From 2647feb650d7d48f85a957621d8404d9e34cccdb Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 24 Oct 2018 14:37:21 +0200 Subject: [PATCH 163/189] ipv6/ndisc: Preserve IPv6 control buffer if protocol error handlers are called [ Upstream commit ee1abcf689353f36d9322231b4320926096bdee0 ] Commit a61bbcf28a8c ("[NET]: Store skb->timestamp as offset to a base timestamp") introduces a neighbour control buffer and zeroes it out in ndisc_rcv(), as ndisc_recv_ns() uses it. Commit f2776ff04722 ("[IPV6]: Fix address/interface handling in UDP and DCCP, according to the scoping architecture.") introduces the usage of the IPv6 control buffer in protocol error handlers (e.g. inet6_iif() in present-day __udp6_lib_err()). Now, with commit b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect()."), we call protocol error handlers from ndisc_redirect_rcv(), after the control buffer is already stolen and some parts are already zeroed out. This implies that inet6_iif() on this path will always return zero. This gives unexpected results on UDP socket lookup in __udp6_lib_err(), as we might actually need to match sockets for a given interface. Instead of always claiming the control buffer in ndisc_rcv(), do that only when needed. Fixes: b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect().") Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ndisc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3db8d7d1a986..0bf375177a9a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1649,10 +1649,9 @@ int ndisc_rcv(struct sk_buff *skb) return 0; } - memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); ndisc_recv_ns(skb); break; From 97749034bdb4aa8dea0ff7b08f9e08823a535ebb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 19 Oct 2018 10:00:19 -0700 Subject: [PATCH 164/189] net/ipv6: Fix index counter for unicast addresses in in6_dump_addrs [ Upstream commit 4ba4c566ba8448a05e6257e0b98a21f1a0d55315 ] The loop wants to skip previously dumped addresses, so loops until current index >= saved index. If the message fills it wants to save the index for the next address to dump - ie., the one that did not fit in the current message. Currently, it is incrementing the index counter before comparing to the saved index, and then the saved index is off by 1 - it assumes the current address is going to fit in the message. Change the index handling to increment only after a succesful dump. Fixes: 502a2ffd7376a ("ipv6: convert idev_list to list macros") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 582e757e5727..4dde1e0e7d37 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4439,8 +4439,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, /* unicast address incl. temp addr */ list_for_each_entry(ifa, &idev->addr_list, if_list) { - if (++ip_idx < s_ip_idx) - continue; + if (ip_idx < s_ip_idx) + goto next; err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -4449,6 +4449,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (err < 0) break; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +next: + ip_idx++; } break; } From 7fc851d0f813c939e72f26d37bd8cd876abc8241 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 26 Oct 2018 15:51:06 -0700 Subject: [PATCH 165/189] net: sched: gred: pass the right attribute to gred_change_table_def() [ Upstream commit 38b4f18d56372e1e21771ab7b0357b853330186c ] gred_change_table_def() takes a pointer to TCA_GRED_DPS attribute, and expects it will be able to interpret its contents as struct tc_gred_sopt. Pass the correct gred attribute, instead of TCA_OPTIONS. This bug meant the table definition could never be changed after Qdisc was initialized (unless whatever TCA_OPTIONS contained both passed netlink validation and was a valid struct tc_gred_sopt...). Old behaviour: $ ip link add type dummy $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 RTNETLINK answers: Invalid argument Now: $ ip link add type dummy $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 $ tc qdisc replace dev dummy0 parent root handle 7: \ gred setup vqs 4 default 0 Fixes: f62d6b936df5 ("[PKT_SCHED]: GRED: Use central VQ change procedure") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_gred.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index f9e8deeeac96..a5745cb2d014 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -444,7 +444,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { if (tb[TCA_GRED_LIMIT] != NULL) sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); - return gred_change_table_def(sch, opt); + return gred_change_table_def(sch, tb[TCA_GRED_DPS]); } if (tb[TCA_GRED_PARMS] == NULL || From 98528072a2a9f7936bb2cbda6bf0f34545a45adf Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Thu, 18 Oct 2018 09:36:46 -0500 Subject: [PATCH 166/189] net: socket: fix a missing-check bug [ Upstream commit b6168562c8ce2bd5a30e213021650422e08764dc ] In ethtool_ioctl(), the ioctl command 'ethcmd' is checked through a switch statement to see whether it is necessary to pre-process the ethtool structure, because, as mentioned in the comment, the structure ethtool_rxnfc is defined with padding. If yes, a user-space buffer 'rxnfc' is allocated through compat_alloc_user_space(). One thing to note here is that, if 'ethcmd' is ETHTOOL_GRXCLSRLALL, the size of the buffer 'rxnfc' is partially determined by 'rule_cnt', which is actually acquired from the user-space buffer 'compat_rxnfc', i.e., 'compat_rxnfc->rule_cnt', through get_user(). After 'rxnfc' is allocated, the data in the original user-space buffer 'compat_rxnfc' is then copied to 'rxnfc' through copy_in_user(), including the 'rule_cnt' field. However, after this copy, no check is re-enforced on 'rxnfc->rule_cnt'. So it is possible that a malicious user race to change the value in the 'compat_rxnfc->rule_cnt' between these two copies. Through this way, the attacker can bypass the previous check on 'rule_cnt' and inject malicious data. This can cause undefined behavior of the kernel and introduce potential security risk. This patch avoids the above issue via copying the value acquired by get_user() to 'rxnfc->rule_cn', if 'ethcmd' is ETHTOOL_GRXCLSRLALL. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/socket.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/socket.c b/net/socket.c index 0c544ae48eac..96133777d17c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2760,9 +2760,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) copy_in_user(&rxnfc->fs.ring_cookie, &compat_rxnfc->fs.ring_cookie, (void __user *)(&rxnfc->fs.location + 1) - - (void __user *)&rxnfc->fs.ring_cookie) || - copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, - sizeof(rxnfc->rule_cnt))) + (void __user *)&rxnfc->fs.ring_cookie)) + return -EFAULT; + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + if (put_user(rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + } else if (copy_in_user(&rxnfc->rule_cnt, + &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) return -EFAULT; } From d5df0bdcceba33e121fe338dd85db9f86cedd1f4 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 31 Oct 2018 16:08:10 +0100 Subject: [PATCH 167/189] net: stmmac: Fix stmmac_mdio_reset() when building stmmac as modules [ Upstream commit 30549aab146ccb1275230c3b4b4bc6b4181fd54e ] When building stmmac, it is only possible to select CONFIG_DWMAC_GENERIC, or any of the glue drivers, when CONFIG_STMMAC_PLATFORM is set. The only exception is CONFIG_STMMAC_PCI. When calling of_mdiobus_register(), it will call our ->reset() callback, which is set to stmmac_mdio_reset(). Most of the code in stmmac_mdio_reset() is protected by a "#if defined(CONFIG_STMMAC_PLATFORM)", which will evaluate to false when CONFIG_STMMAC_PLATFORM=m. Because of this, the phy reset gpio will only be pulled when stmmac is built as built-in, but not when built as modules. Fix this by using "#if IS_ENABLED()" instead of "#if defined()". Signed-off-by: Niklas Cassel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index bba670c42e37..90d95b3654f5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -130,7 +130,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, */ int stmmac_mdio_reset(struct mii_bus *bus) { -#if defined(CONFIG_STMMAC_PLATFORM) +#if IS_ENABLED(CONFIG_STMMAC_PLATFORM) struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; From 104ed9e3d12651ba6a95aad7f229c966386f2f83 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 18 Oct 2018 19:56:01 +0200 Subject: [PATCH 168/189] r8169: fix NAPI handling under high load [ Upstream commit 6b839b6cf9eada30b086effb51e5d6076bafc761 ] rtl_rx() and rtl_tx() are called only if the respective bits are set in the interrupt status register. Under high load NAPI may not be able to process all data (work_done == budget) and it will schedule subsequent calls to the poll callback. rtl_ack_events() however resets the bits in the interrupt status register, therefore subsequent calls to rtl8169_poll() won't call rtl_rx() and rtl_tx() - chip interrupts are still disabled. Fix this by calling rtl_rx() and rtl_tx() independent of the bits set in the interrupt status register. Both functions will detect if there's nothing to do for them. Fixes: da78dbff2e05 ("r8169: remove work from irq handler.") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index c6782ebd35e1..93543e176829 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7540,17 +7540,15 @@ static int rtl8169_poll(struct napi_struct *napi, int budget) struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi); struct net_device *dev = tp->dev; u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow; - int work_done= 0; + int work_done; u16 status; status = rtl_get_events(tp); rtl_ack_events(tp, status & ~tp->event_slow); - if (status & RTL_EVENT_NAPI_RX) - work_done = rtl_rx(dev, tp, (u32) budget); + work_done = rtl_rx(dev, tp, (u32) budget); - if (status & RTL_EVENT_NAPI_TX) - rtl_tx(dev, tp); + rtl_tx(dev, tp); if (status & tp->event_slow) { enable_mask &= ~tp->event_slow; From fee37f15abcc273766186e13c2ff0338f14035cd Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 16 Oct 2018 15:18:17 -0300 Subject: [PATCH 169/189] sctp: fix race on sctp_id2asoc [ Upstream commit b336decab22158937975293aea79396525f92bb3 ] syzbot reported an use-after-free involving sctp_id2asoc. Dmitry Vyukov helped to root cause it and it is because of reading the asoc after it was freed: CPU 1 CPU 2 (working on socket 1) (working on socket 2) sctp_association_destroy sctp_id2asoc spin lock grab the asoc from idr spin unlock spin lock remove asoc from idr spin unlock free(asoc) if asoc->base.sk != sk ... [*] This can only be hit if trying to fetch asocs from different sockets. As we have a single IDR for all asocs, in all SCTP sockets, their id is unique on the system. An application can try to send stuff on an id that matches on another socket, and the if in [*] will protect from such usage. But it didn't consider that as that asoc may belong to another socket, it may be freed in parallel (read: under another socket lock). We fix it by moving the checks in [*] into the protected region. This fixes it because the asoc cannot be freed while the lock is held. Reported-by: syzbot+c7dd55d7aec49d48e49a@syzkaller.appspotmail.com Acked-by: Dmitry Vyukov Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 13c7f42b7040..53f1b33bca4e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -248,11 +248,10 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id) spin_lock_bh(&sctp_assocs_id_lock); asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id); + if (asoc && (asoc->base.sk != sk || asoc->base.dead)) + asoc = NULL; spin_unlock_bh(&sctp_assocs_id_lock); - if (!asoc || (asoc->base.sk != sk) || asoc->base.dead) - return NULL; - return asoc; } From 8de8589c09dc78c5245b73d6b31a615a5aed52b6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 1 Nov 2018 12:02:37 -0700 Subject: [PATCH 170/189] net: drop skb on failure in ip_check_defrag() [ Upstream commit 7de414a9dd91426318df7b63da024b2b07e53df5 ] Most callers of pskb_trim_rcsum() simply drop the skb when it fails, however, ip_check_defrag() still continues to pass the skb up to stack. This is suspicious. In ip_check_defrag(), after we learn the skb is an IP fragment, passing the skb to callers makes no sense, because callers expect fragments are defrag'ed on success. So, dropping the skb when we can't defrag it is reasonable. Note, prior to commit 88078d98d1bb, this is not a big problem as checksum will be fixed up anyway. After it, the checksum is not correct on failure. Found this during code review. Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") Cc: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7057a1b09b5e..72915658a6b1 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -716,10 +716,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { - if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) - return skb; - if (pskb_trim_rcsum(skb, netoff + len)) - return skb; + if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) { + kfree_skb(skb); + return NULL; + } + if (pskb_trim_rcsum(skb, netoff + len)) { + kfree_skb(skb); + return NULL; + } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(net, skb, user)) return NULL; From 628a149b00d1eb16687e61f8d93676db3e49071c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 30 Oct 2018 14:10:49 +0800 Subject: [PATCH 171/189] vhost: Fix Spectre V1 vulnerability [ Upstream commit ff002269a4ee9c769dbf9365acef633ebcbd6cbe ] The idx in vhost_vring_ioctl() was controlled by userspace, hence a potential exploitation of the Spectre variant 1 vulnerability. Fixing this by sanitizing idx before using it to index d->vqs. Cc: Michael S. Tsirkin Cc: Josh Poimboeuf Cc: Andrea Arcangeli Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vhost.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 675819a1af37..c54d388310f0 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "vhost.h" @@ -748,6 +749,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) if (idx >= d->nvqs) return -ENOBUFS; + idx = array_index_nospec(idx, d->nvqs); vq = d->vqs[idx]; mutex_lock(&vq->mutex); From abd46fca025ccb8781303a4956e2bf031a7e8c01 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 Oct 2018 20:36:43 +0000 Subject: [PATCH 172/189] rtnetlink: Disallow FDB configuration for non-Ethernet device [ Upstream commit da71577545a52be3e0e9225a946e5fd79cfab015 ] When an FDB entry is configured, the address is validated to have the length of an Ethernet address, but the device for which the address is configured can be of any type. The above can result in the use of uninitialized memory when the address is later compared against existing addresses since 'dev->addr_len' is used and it may be greater than ETH_ALEN, as with ip6tnl devices. Fix this by making sure that FDB entries are only configured for Ethernet devices. BUG: KMSAN: uninit-value in memcmp+0x11d/0x180 lib/string.c:863 CPU: 1 PID: 4318 Comm: syz-executor998 Not tainted 4.19.0-rc3+ #49 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x14b/0x190 lib/dump_stack.c:113 kmsan_report+0x183/0x2b0 mm/kmsan/kmsan.c:956 __msan_warning+0x70/0xc0 mm/kmsan/kmsan_instr.c:645 memcmp+0x11d/0x180 lib/string.c:863 dev_uc_add_excl+0x165/0x7b0 net/core/dev_addr_lists.c:464 ndo_dflt_fdb_add net/core/rtnetlink.c:3463 [inline] rtnl_fdb_add+0x1081/0x1270 net/core/rtnetlink.c:3558 rtnetlink_rcv_msg+0xa0b/0x1530 net/core/rtnetlink.c:4715 netlink_rcv_skb+0x36e/0x5f0 net/netlink/af_netlink.c:2454 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4733 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x1638/0x1720 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x1205/0x1290 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114 __sys_sendmsg net/socket.c:2152 [inline] __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159 do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440ee9 Code: e8 cc ab 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 bb 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff6a93b518 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000440ee9 RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000003 RBP: 0000000000000000 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 000000000000b4b0 R13: 0000000000401ec0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:256 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:181 kmsan_kmalloc+0x98/0x100 mm/kmsan/kmsan_hooks.c:91 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan_hooks.c:100 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2718 [inline] __kmalloc_node_track_caller+0x9e7/0x1160 mm/slub.c:4351 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2f5/0x9e0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:996 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1189 [inline] netlink_sendmsg+0xb49/0x1290 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114 __sys_sendmsg net/socket.c:2152 [inline] __do_sys_sendmsg net/socket.c:2161 [inline] __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159 do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 v2: * Make error message more specific (David) Fixes: 090096bf3db1 ("net: generic fdb support for drivers without ndo_fdb_") Signed-off-by: Ido Schimmel Reported-and-tested-by: syzbot+3a288d5f5530b901310e@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+d53ab4e92a1db04110ff@syzkaller.appspotmail.com Cc: Vlad Yasevich Cc: David Ahern Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f1df04c7d395..d2a46ffe6382 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2734,6 +2734,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; } + if (dev->type != ARPHRD_ETHER) { + pr_info("PF_BRIDGE: FDB add only supported for Ethernet devices"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid); @@ -2836,6 +2841,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; } + if (dev->type != ARPHRD_ETHER) { + pr_info("PF_BRIDGE: FDB delete only supported for Ethernet devices"); + return -EINVAL; + } + addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid); From 2e3ae534fb98c7a6a5cf3e80a190181154328f80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 2 Nov 2018 13:22:42 +0000 Subject: [PATCH 173/189] mremap: properly flush TLB before releasing the page Commit eb66ae030829605d61fbef1909ce310e29f78821 upstream. This is a backport to stable 4.4.y. Jann Horn points out that our TLB flushing was subtly wrong for the mremap() case. What makes mremap() special is that we don't follow the usual "add page to list of pages to be freed, then flush tlb, and then free pages". No, mremap() obviously just _moves_ the page from one page table location to another. That matters, because mremap() thus doesn't directly control the lifetime of the moved page with a freelist: instead, the lifetime of the page is controlled by the page table locking, that serializes access to the entry. As a result, we need to flush the TLB not just before releasing the lock for the source location (to avoid any concurrent accesses to the entry), but also before we release the destination page table lock (to avoid the TLB being flushed after somebody else has already done something to that page). This also makes the whole "need_flush" logic unnecessary, since we now always end up flushing the TLB for every valid entry. Reported-and-tested-by: Jann Horn Acked-by: Will Deacon Tested-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman [will: backport to 4.4 stable] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 6 +++++- mm/mremap.c | 21 ++++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c4ea57ee2fd1..465786cd6490 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1511,7 +1511,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, spinlock_t *old_ptl, *new_ptl; int ret = 0; pmd_t pmd; - + bool force_flush = false; struct mm_struct *mm = vma->vm_mm; if ((old_addr & ~HPAGE_PMD_MASK) || @@ -1539,6 +1539,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); + if (pmd_present(pmd)) + force_flush = true; VM_BUG_ON(!pmd_none(*new_pmd)); if (pmd_move_must_withdraw(new_ptl, old_ptl)) { @@ -1547,6 +1549,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, pgtable_trans_huge_deposit(mm, new_pmd, pgtable); } set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); + if (force_flush) + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); diff --git a/mm/mremap.c b/mm/mremap.c index fe7b7f65f4f4..450b306d473e 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -96,6 +96,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + bool force_flush = false; + unsigned long len = old_end - old_addr; /* * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma @@ -143,12 +145,26 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (pte_none(*old_pte)) continue; pte = ptep_get_and_clear(mm, old_addr, old_pte); + /* + * If we are remapping a valid PTE, make sure + * to flush TLB before we drop the PTL for the PTE. + * + * NOTE! Both old and new PTL matter: the old one + * for racing with page_mkclean(), the new one to + * make sure the physical page stays valid until + * the TLB entry for the old mapping has been + * flushed. + */ + if (pte_present(pte)) + force_flush = true; pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); set_pte_at(mm, new_addr, new_pte, pte); } arch_leave_lazy_mmu_mode(); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); @@ -168,7 +184,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, { unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; - bool need_flush = false; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -207,7 +222,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, anon_vma_unlock_write(vma->anon_vma); } if (err > 0) { - need_flush = true; continue; } else if (!err) { split_huge_page_pmd(vma, old_addr, old_pmd); @@ -224,10 +238,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, extent = LATENCY_LIMIT; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, new_pmd, new_addr, need_rmap_locks); - need_flush = true; } - if (likely(need_flush)) - flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); From 333de2f4131efde180eba58abe231f4ad35d8451 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Tue, 3 Oct 2017 10:25:22 +0800 Subject: [PATCH 174/189] crypto: shash - Fix a sleep-in-atomic bug in shash_setkey_unaligned [ Upstream commit 9039f3ef446e9ffa200200c934f049add9e58426 ] The SCTP program may sleep under a spinlock, and the function call path is: sctp_generate_t3_rtx_event (acquire the spinlock) sctp_do_sm sctp_side_effects sctp_cmd_interpreter sctp_make_init_ack sctp_pack_cookie crypto_shash_setkey shash_setkey_unaligned kmalloc(GFP_KERNEL) For the same reason, the orinoco driver may sleep in interrupt handler, and the function call path is: orinoco_rx_isr_tasklet orinoco_rx orinoco_mic crypto_shash_setkey shash_setkey_unaligned kmalloc(GFP_KERNEL) To fix it, GFP_KERNEL is replaced with GFP_ATOMIC. This bug is found by my static analysis tool and my code review. Signed-off-by: Jia-Ju Bai Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- crypto/shash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/shash.c b/crypto/shash.c index 5444b429e35d..4f89f78031e2 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -41,7 +41,7 @@ static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, int err; absize = keylen + (alignmask & ~(crypto_tfm_ctx_alignment() - 1)); - buffer = kmalloc(absize, GFP_KERNEL); + buffer = kmalloc(absize, GFP_ATOMIC); if (!buffer) return -ENOMEM; From f2adb1f6f9d691f8d518b6b44ea20618e636ffee Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 2 Oct 2017 19:31:24 +0100 Subject: [PATCH 175/189] ahci: don't ignore result code of ahci_reset_controller() [ Upstream commit d312fefea8387503375f728855c9a62de20c9665 ] ahci_pci_reset_controller() calls ahci_reset_controller(), which may fail, but ignores the result code and always returns success. This may result in failures like below ahci 0000:02:00.0: version 3.0 ahci 0000:02:00.0: enabling device (0000 -> 0003) ahci 0000:02:00.0: SSS flag set, parallel bus scan disabled ahci 0000:02:00.0: controller reset failed (0xffffffff) ahci 0000:02:00.0: failed to stop engine (-5) ... repeated many times ... ahci 0000:02:00.0: failed to stop engine (-5) Unable to handle kernel paging request at virtual address ffff0000093f9018 ... PC is at ahci_stop_engine+0x5c/0xd8 [libahci] LR is at ahci_deinit_port.constprop.12+0x1c/0xc0 [libahci] ... [] ahci_stop_engine+0x5c/0xd8 [libahci] [] ahci_deinit_port.constprop.12+0x1c/0xc0 [libahci] [] ahci_init_controller+0x80/0x168 [libahci] [] ahci_pci_init_controller+0x60/0x68 [ahci] [] ahci_init_one+0x75c/0xd88 [ahci] [] local_pci_probe+0x3c/0xb8 [] pci_device_probe+0x138/0x170 [] driver_probe_device+0x2dc/0x458 [] __driver_attach+0x114/0x118 [] bus_for_each_dev+0x60/0xa0 [] driver_attach+0x20/0x28 [] bus_add_driver+0x1f0/0x2a8 [] driver_register+0x60/0xf8 [] __pci_register_driver+0x3c/0x48 [] ahci_pci_driver_init+0x1c/0x1000 [ahci] [] do_one_initcall+0x38/0x120 where an obvious hardware level failure results in an unnecessary 15 second delay and a subsequent crash. So record the result code of ahci_reset_controller() and relay it, rather than ignoring it. Signed-off-by: Ard Biesheuvel Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- drivers/ata/ahci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 34fdaa6e99ba..5f1f049063dd 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -619,8 +619,11 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev, static int ahci_pci_reset_controller(struct ata_host *host) { struct pci_dev *pdev = to_pci_dev(host->dev); + int rc; - ahci_reset_controller(host); + rc = ahci_reset_controller(host); + if (rc) + return rc; if (pdev->vendor == PCI_VENDOR_ID_INTEL) { struct ahci_host_priv *hpriv = host->private_data; From d1ce094c3c90434844c6580be7290517762422d6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Oct 2018 15:23:26 +0100 Subject: [PATCH 176/189] cachefiles: fix the race between cachefiles_bury_object() and rmdir(2) commit 169b803397499be85bdd1e3d07d6f5e3d4bd669e upstream. the victim might've been rmdir'ed just before the lock_rename(); unlike the normal callers, we do not look the source up after the parents are locked - we know it beforehand and just recheck that it's still the child of what used to be its parent. Unfortunately, the check is too weak - we don't spot a dead directory since its ->d_parent is unchanged, dentry is positive, etc. So we sail all the way to ->rename(), with hosting filesystems _not_ expecting to be asked renaming an rmdir'ed subdirectory. The fix is easy, fortunately - the lock on parent is sufficient for making IS_DEADDIR() on child safe. Cc: stable@vger.kernel.org Fixes: 9ae326a69004 (CacheFiles: A cache that backs onto a mounted filesystem) Signed-off-by: Al Viro Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- fs/cachefiles/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index c43b4b08546b..a5f59eed8287 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -317,7 +317,7 @@ try_again: trap = lock_rename(cache->graveyard, dir); /* do some checks before getting the grave dentry */ - if (rep->d_parent != dir) { + if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { /* the entry was probably culled when we dropped the parent dir * lock */ unlock_rename(cache->graveyard, dir); From 3700bfc36c3cef29533951d31b37b041628bb5b2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 15:06:41 +0200 Subject: [PATCH 177/189] ptp: fix Spectre v1 vulnerability commit efa61c8cf2950ab5c0e66cff3cabe2a2b24e81ba upstream. pin_index can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/ptp/ptp_chardev.c:253 ptp_ioctl() warn: potential spectre issue 'ops->pin_config' [r] (local cap) Fix this by sanitizing pin_index before using it to index ops->pin_config, and before passing it as an argument to function ptp_set_pinfunc(), in which it is used to index info->pin_config. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/ptp/ptp_chardev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index d877ff124365..4eb254a273f8 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -23,6 +23,8 @@ #include #include +#include + #include "ptp_private.h" static int ptp_disable_pinfunc(struct ptp_clock_info *ops, @@ -224,6 +226,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; pd = ops->pin_config[pin_index]; @@ -242,6 +245,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = -EINVAL; break; } + pin_index = array_index_nospec(pin_index, ops->n_pins); if (mutex_lock_interruptible(&ptp->pincfg_mux)) return -ERESTARTSYS; err = ptp_set_pinfunc(ptp, pin_index, pd.func, pd.chan); From 6ede39a8e1f64babac7e50e989f8b6535e7e7dde Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 16:59:01 +0200 Subject: [PATCH 178/189] RDMA/ucma: Fix Spectre v1 vulnerability commit a3671a4f973ee9d9621d60166cc3b037c397d604 upstream. hdr.cmd can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/infiniband/core/ucma.c:1686 ucma_write() warn: potential spectre issue 'ucma_cmd_table' [r] (local cap) Fix this by sanitizing hdr.cmd before using it to index ucm_cmd_table. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 7525e9f6949e..3e4d3d5560bf 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -44,6 +44,8 @@ #include #include +#include + #include #include #include @@ -1627,6 +1629,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucma_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; From 17eb02cc26926b879015590a86380e431048057e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 16:32:40 +0200 Subject: [PATCH 179/189] IB/ucm: Fix Spectre v1 vulnerability commit 0295e39595e1146522f2722715dba7f7fba42217 upstream. hdr.cmd can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/infiniband/core/ucm.c:1127 ib_ucm_write() warn: potential spectre issue 'ucm_cmd_table' [r] (local cap) Fix this by sanitizing hdr.cmd before using it to index ucm_cmd_table. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/ucm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 564adf3116e8..4b3a00855f52 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -46,6 +46,8 @@ #include #include +#include + #include #include @@ -1115,6 +1117,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf, if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) return -EINVAL; + hdr.cmd = array_index_nospec(hdr.cmd, ARRAY_SIZE(ucm_cmd_table)); if (hdr.in + sizeof(hdr) > len) return -EINVAL; From 17275e092a0126211eec3114d64c639feee7cacd Mon Sep 17 00:00:00 2001 From: Tobias Herzog Date: Sat, 22 Sep 2018 22:11:11 +0200 Subject: [PATCH 180/189] cdc-acm: correct counting of UART states in serial state notification commit f976d0e5747ca65ccd0fb2a4118b193d70aa1836 upstream. The usb standard ("Universal Serial Bus Class Definitions for Communication Devices") distiguishes between "consistent signals" (DSR, DCD), and "irregular signals" (break, ring, parity error, framing error, overrun). The bits of "irregular signals" are set, if this error/event occurred on the device side and are immeadeatly unset, if the serial state notification was sent. Like other drivers of real serial ports do, just the occurence of those events should be counted in serial_icounter_struct (but no 1->0 transitions). Signed-off-by: Tobias Herzog Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index a501f3ba6a3f..3cbf6aa10f2c 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -332,17 +332,17 @@ static void acm_ctrl_irq(struct urb *urb) if (difference & ACM_CTRL_DSR) acm->iocount.dsr++; - if (difference & ACM_CTRL_BRK) - acm->iocount.brk++; - if (difference & ACM_CTRL_RI) - acm->iocount.rng++; if (difference & ACM_CTRL_DCD) acm->iocount.dcd++; - if (difference & ACM_CTRL_FRAMING) + if (newctrl & ACM_CTRL_BRK) + acm->iocount.brk++; + if (newctrl & ACM_CTRL_RI) + acm->iocount.rng++; + if (newctrl & ACM_CTRL_FRAMING) acm->iocount.frame++; - if (difference & ACM_CTRL_PARITY) + if (newctrl & ACM_CTRL_PARITY) acm->iocount.parity++; - if (difference & ACM_CTRL_OVERRUN) + if (newctrl & ACM_CTRL_OVERRUN) acm->iocount.overrun++; spin_unlock(&acm->read_lock); From 87f8db65811b5bdb7686d0e53fab486d9fc2fff8 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 16 Oct 2018 12:16:45 +0200 Subject: [PATCH 181/189] usb: gadget: storage: Fix Spectre v1 vulnerability commit 9ae24af3669111d418242caec8dd4ebd9ba26860 upstream. num can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/usb/gadget/function/f_mass_storage.c:3177 fsg_lun_make() warn: potential spectre issue 'fsg_opts->common->luns' [r] (local cap) Fix this by sanitizing num before using it to index fsg_opts->common->luns Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_mass_storage.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 4dd3c7672247..25488c89308a 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -220,6 +220,8 @@ #include #include +#include + #include "configfs.h" @@ -3260,6 +3262,7 @@ static struct config_group *fsg_lun_make(struct config_group *group, fsg_opts = to_fsg_opts(&group->cg_item); if (num >= FSG_MAX_LUNS) return ERR_PTR(-ERANGE); + num = array_index_nospec(num, FSG_MAX_LUNS); mutex_lock(&fsg_opts->lock); if (fsg_opts->refcnt || fsg_opts->common->luns[num]) { From 506617d92fc31de7e1a769da660924d42097210f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 15 Oct 2018 16:55:04 -0400 Subject: [PATCH 182/189] USB: fix the usbfs flag sanitization for control transfers commit 665c365a77fbfeabe52694aedf3446d5f2f1ce42 upstream. Commit 7a68d9fb8510 ("USB: usbdevfs: sanitize flags more") checks the transfer flags for URBs submitted from userspace via usbfs. However, the check for whether the USBDEVFS_URB_SHORT_NOT_OK flag should be allowed for a control transfer was added in the wrong place, before the code has properly determined the direction of the control transfer. (Control transfers are special because for them, the direction is set by the bRequestType byte of the Setup packet rather than direction bit of the endpoint address.) This patch moves code which sets up the allow_short flag for control transfers down after is_in has been set to the correct value. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+24a30223a4b609bb802e@syzkaller.appspotmail.com Fixes: 7a68d9fb8510 ("USB: usbdevfs: sanitize flags more") CC: Oliver Neukum CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/devio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 5e0af15aebc4..7559d96695da 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1329,8 +1329,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb u = 0; switch (uurb->type) { case USBDEVFS_URB_TYPE_CONTROL: - if (is_in) - allow_short = true; if (!usb_endpoint_xfer_control(&ep->desc)) return -EINVAL; /* min 8 byte setup packet */ @@ -1360,6 +1358,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb is_in = 0; uurb->endpoint &= ~USB_DIR_IN; } + if (is_in) + allow_short = true; snoop(&ps->dev->dev, "control urb: bRequestType=%02x " "bRequest=%02x wValue=%04x " "wIndex=%04x wLength=%04x\n", From c057f7581524a8d845e6a102e1b75faa0dac2bd6 Mon Sep 17 00:00:00 2001 From: Mikhail Nikiforov Date: Mon, 15 Oct 2018 11:17:56 -0700 Subject: [PATCH 183/189] Input: elan_i2c - add ACPI ID for Lenovo IdeaPad 330-15IGM commit 13c1c5e4d7f887cba36c5e3df3faa22071c1469f upstream. Add ELAN061C to the ACPI table to support Elan touchpad found in Lenovo IdeaPad 330-15IGM. Signed-off-by: Mikhail Nikiforov Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index a716482774db..b3119589a444 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1251,6 +1251,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0611", 0 }, { "ELAN0612", 0 }, { "ELAN0618", 0 }, + { "ELAN061C", 0 }, { "ELAN061D", 0 }, { "ELAN0622", 0 }, { "ELAN1000", 0 }, From 4a2b54a750bf18be02997944464396485b7ef284 Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Mon, 8 Oct 2018 10:36:40 -0400 Subject: [PATCH 184/189] sched/fair: Fix throttle_list starvation with low CFS quota commit baa9be4ffb55876923dc9716abc0a448e510ba30 upstream. With a very low cpu.cfs_quota_us setting, such as the minimum of 1000, distribute_cfs_runtime may not empty the throttled_list before it runs out of runtime to distribute. In that case, due to the change from c06f04c7048 to put throttled entries at the head of the list, later entries on the list will starve. Essentially, the same X processes will get pulled off the list, given CPU time and then, when expired, get put back on the head of the list where distribute_cfs_runtime will give runtime to the same set of processes leaving the rest. Fix the issue by setting a bit in struct cfs_bandwidth when distribute_cfs_runtime is running, so that the code in throttle_cfs_rq can decide to put the throttled entry on the tail or the head of the list. The bit is set/cleared by the callers of distribute_cfs_runtime while they hold cfs_bandwidth->lock. This is easy to reproduce with a handful of CPU consumers. I use 'crash' on the live system. In some cases you can simply look at the throttled list and see the later entries are not changing: crash> list cfs_rq.throttled_list -H 0xffff90b54f6ade40 -s cfs_rq.runtime_remaining | paste - - | awk '{print $1" "$4}' | pr -t -n3 1 ffff90b56cb2d200 -976050 2 ffff90b56cb2cc00 -484925 3 ffff90b56cb2bc00 -658814 4 ffff90b56cb2ba00 -275365 5 ffff90b166a45600 -135138 6 ffff90b56cb2da00 -282505 7 ffff90b56cb2e000 -148065 8 ffff90b56cb2fa00 -872591 9 ffff90b56cb2c000 -84687 10 ffff90b56cb2f000 -87237 11 ffff90b166a40a00 -164582 crash> list cfs_rq.throttled_list -H 0xffff90b54f6ade40 -s cfs_rq.runtime_remaining | paste - - | awk '{print $1" "$4}' | pr -t -n3 1 ffff90b56cb2d200 -994147 2 ffff90b56cb2cc00 -306051 3 ffff90b56cb2bc00 -961321 4 ffff90b56cb2ba00 -24490 5 ffff90b166a45600 -135138 6 ffff90b56cb2da00 -282505 7 ffff90b56cb2e000 -148065 8 ffff90b56cb2fa00 -872591 9 ffff90b56cb2c000 -84687 10 ffff90b56cb2f000 -87237 11 ffff90b166a40a00 -164582 Sometimes it is easier to see by finding a process getting starved and looking at the sched_info: crash> task ffff8eb765994500 sched_info PID: 7800 TASK: ffff8eb765994500 CPU: 16 COMMAND: "cputest" sched_info = { pcount = 8, run_delay = 697094208, last_arrival = 240260125039, last_queued = 240260327513 }, crash> task ffff8eb765994500 sched_info PID: 7800 TASK: ffff8eb765994500 CPU: 16 COMMAND: "cputest" sched_info = { pcount = 8, run_delay = 697094208, last_arrival = 240260125039, last_queued = 240260327513 }, Signed-off-by: Phil Auld Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: c06f04c70489 ("sched: Fix potential near-infinite distribute_cfs_runtime() loop") Link: http://lkml.kernel.org/r/20181008143639.GA4019@pauld.bos.csb Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/fair.c | 22 +++++++++++++++++++--- kernel/sched/sched.h | 2 ++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a0c5bb93a3ab..c2af250547bb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3624,9 +3624,13 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) /* * Add to the _head_ of the list, so that an already-started - * distribute_cfs_runtime will not see us + * distribute_cfs_runtime will not see us. If disribute_cfs_runtime is + * not running add to the tail so that later runqueues don't get starved. */ - list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + if (cfs_b->distribute_running) + list_add_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); + else + list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq); /* * If we're the first throttled task, make sure the bandwidth @@ -3769,14 +3773,16 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) * in us over-using our runtime if it is all used during this loop, but * only by limited amounts in that extreme case. */ - while (throttled && cfs_b->runtime > 0) { + while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) { runtime = cfs_b->runtime; + cfs_b->distribute_running = 1; raw_spin_unlock(&cfs_b->lock); /* we can't nest cfs_b->lock while distributing bandwidth */ runtime = distribute_cfs_runtime(cfs_b, runtime, runtime_expires); raw_spin_lock(&cfs_b->lock); + cfs_b->distribute_running = 0; throttled = !list_empty(&cfs_b->throttled_cfs_rq); cfs_b->runtime -= min(runtime, cfs_b->runtime); @@ -3887,6 +3893,11 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) /* confirm we're still not at a refresh boundary */ raw_spin_lock(&cfs_b->lock); + if (cfs_b->distribute_running) { + raw_spin_unlock(&cfs_b->lock); + return; + } + if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) { raw_spin_unlock(&cfs_b->lock); return; @@ -3896,6 +3907,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) runtime = cfs_b->runtime; expires = cfs_b->runtime_expires; + if (runtime) + cfs_b->distribute_running = 1; + raw_spin_unlock(&cfs_b->lock); if (!runtime) @@ -3906,6 +3920,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) raw_spin_lock(&cfs_b->lock); if (expires == cfs_b->runtime_expires) cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->distribute_running = 0; raw_spin_unlock(&cfs_b->lock); } @@ -4017,6 +4032,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) cfs_b->period_timer.function = sched_cfs_period_timer; hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); cfs_b->slack_timer.function = sched_cfs_slack_timer; + cfs_b->distribute_running = 0; } static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index af8d8c3eb8ab..6893ee31df4d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -233,6 +233,8 @@ struct cfs_bandwidth { /* statistics */ int nr_periods, nr_throttled; u64 throttled_time; + + bool distribute_running; #endif }; From 74ede0af32f565ac5b7222c8722cc7da6951bef4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2018 12:38:27 +0200 Subject: [PATCH 185/189] x86/percpu: Fix this_cpu_read() commit b59167ac7bafd804c91e49ad53c6d33a7394d4c8 upstream. Eric reported that a sequence count loop using this_cpu_read() got optimized out. This is wrong, this_cpu_read() must imply READ_ONCE() because the interface is IRQ-safe, therefore an interrupt can have changed the per-cpu value. Fixes: 7c3576d261ce ("[PATCH] i386: Convert PDA into the percpu section") Reported-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Eric Dumazet Cc: hpa@zytor.com Cc: eric.dumazet@gmail.com Cc: bp@alien8.de Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181011104019.748208519@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/percpu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e0ba66ca68c6..f5e780bfa2b3 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -184,22 +184,22 @@ do { \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(1)",%0" \ + asm volatile(op "b "__percpu_arg(1)",%0"\ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(1)",%0" \ + asm volatile(op "w "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(1)",%0" \ + asm volatile(op "l "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(1)",%0" \ + asm volatile(op "q "__percpu_arg(1)",%0"\ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ From 8a1d3de19b0afbf069baf704afecfbeba9059fce Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 1 Jun 2016 18:52:16 +0100 Subject: [PATCH 186/189] cpuidle: Do not access cpuidle_devices when !CONFIG_CPU_IDLE commit 9bd616e3dbedfc103f158197c8ad93678849b1ed upstream. The cpuidle_devices per-CPU variable is only defined when CPU_IDLE is enabled. Commit c8cc7d4de7a4 ("sched/idle: Reorganize the idle loop") removed the #ifdef CONFIG_CPU_IDLE around cpuidle_idle_call() with the compiler optimising away __this_cpu_read(cpuidle_devices). However, with CONFIG_UBSAN && !CONFIG_CPU_IDLE, this optimisation no longer happens and the kernel fails to link since cpuidle_devices is not defined. This patch introduces an accessor function for the current CPU cpuidle device (returning NULL when !CONFIG_CPU_IDLE) and uses it in cpuidle_idle_call(). Signed-off-by: Catalin Marinas Cc: 4.5+ # 4.5+ Signed-off-by: Rafael J. Wysocki Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/cpuidle.h | 3 +++ kernel/sched/idle.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 786ad32631a6..07b83d32f66c 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -152,6 +152,8 @@ extern void cpuidle_disable_device(struct cpuidle_device *dev); extern int cpuidle_play_dead(void); extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); +static inline struct cpuidle_device *cpuidle_get_device(void) +{return __this_cpu_read(cpuidle_devices); } #else static inline void disable_cpuidle(void) { } static inline bool cpuidle_not_available(struct cpuidle_driver *drv, @@ -187,6 +189,7 @@ static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } static inline int cpuidle_play_dead(void) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_cpu_driver( struct cpuidle_device *dev) {return NULL; } +static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; } #endif #if defined(CONFIG_CPU_IDLE) && defined(CONFIG_SUSPEND) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 4a2ef5a02fd3..bfd573122e0d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -132,7 +132,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, */ static void cpuidle_idle_call(void) { - struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_device *dev = cpuidle_get_device(); struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; From 80ab1e24e2c03585fe4883aaf591cfe07da96edd Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 29 Mar 2017 08:44:59 +0200 Subject: [PATCH 187/189] l2tp: hold tunnel socket when handling control frames in l2tp_ip and l2tp_ip6 commit 94d7ee0baa8b764cf64ad91ed69464c1a6a0066b upstream. The code following l2tp_tunnel_find() expects that a new reference is held on sk. Either sk_receive_skb() or the discard_put error path will drop a reference from the tunnel's socket. This issue exists in both l2tp_ip and l2tp_ip6. Fixes: a3c18422a4b4 ("l2tp: hold socket before dropping lock in l2tp_ip{, 6}_recv()") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ip.c | 5 +++-- net/l2tp/l2tp_ip6.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index c7e6098c924e..af74e3ba0f92 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -177,9 +177,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct iphdr *iph = (struct iphdr *) skb_network_header(skb); read_lock_bh(&l2tp_ip_lock); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 5fe0a6f6af3d..591d308bf63a 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -188,9 +188,10 @@ pass_up: tunnel_id = ntohl(*(__be32 *) &skb->data[4]); tunnel = l2tp_tunnel_find(&init_net, tunnel_id); - if (tunnel != NULL) + if (tunnel) { sk = tunnel->sock; - else { + sock_hold(sk); + } else { struct ipv6hdr *iph = ipv6_hdr(skb); read_lock_bh(&l2tp_ip6_lock); From 8474c9b8cc8b7f29c719d26f4fbca5cb957634fc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 12 Oct 2018 17:53:12 -0700 Subject: [PATCH 188/189] x86/time: Correct the attribute on jiffies' definition commit 53c13ba8ed39e89f21a0b98f4c8a241bb44e483d upstream. Clang warns that the declaration of jiffies in include/linux/jiffies.h doesn't match the definition in arch/x86/time/kernel.c: arch/x86/kernel/time.c:29:42: warning: section does not match previous declaration [-Wsection] __visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; ^ ./include/linux/cache.h:49:4: note: expanded from macro '__cacheline_aligned' __section__(".data..cacheline_aligned"))) ^ ./include/linux/jiffies.h:81:31: note: previous attribute is here extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; ^ ./arch/x86/include/asm/cache.h:20:2: note: expanded from macro '__cacheline_aligned_in_smp' __page_aligned_data ^ ./include/linux/linkage.h:39:29: note: expanded from macro '__page_aligned_data' #define __page_aligned_data __section(.data..page_aligned) __aligned(PAGE_SIZE) ^ ./include/linux/compiler_attributes.h:233:56: note: expanded from macro '__section' #define __section(S) __attribute__((__section__(#S))) ^ 1 warning generated. The declaration was changed in commit 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") but wasn't updated here. Make them match so Clang no longer warns. Fixes: 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Nick Desaulniers Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181013005311.28617-1-natechancellor@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index d39c09119db6..590c8fd2ed9b 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -23,7 +23,7 @@ #include #ifdef CONFIG_X86_64 -__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; +__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES; #endif unsigned long profile_pc(struct pt_regs *regs) From 7a4269707deb6ab22d488eb1a9eedae3ef88abc5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 10 Nov 2018 07:41:44 -0800 Subject: [PATCH 189/189] Linux 4.4.163 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 00ff2dd68ff1..4e3179768eea 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 162 +SUBLEVEL = 163 EXTRAVERSION = NAME = Blurry Fish Butt