Merge: Performance improvements.

This patchset brings some performance improvements and the addition of the LZO-RLE
algorithm to the kernel, also usable in zram (yup, tested, works but LZ4 is still ok for us).

The main performance improvement is for SWAP space: the locking has changed and
the swap cache is now split in 64MB trunks.
This gives us a reduction of the median page fault latency of 375%, from 15uS to 4uS,
and an improvement of 192% on the swap throughput (this includes "virtual" swap
devices, like zRAM!). The real world user experience improvement of this on a mobile
device is seen after a day or two of usage, where it usually starts losing just a little
performance due to the large amount of apps kept open in background: now I cannot
notice any more performance loss and the user experience is now basically the same as
if the phone was in its first 2 hours of boot life.

Other performance improvements include, in short:

    UDP v4/v6: 10% more performance on single RX queue
    Userspace applications will be faster when checking running time of threads
    2-5% improvements on heavy multipliers (yeah, not a lot, but was totally free...)
    Improvements on rare conditions during sparsetruncate of about 0.3% to a
    way more rare around 20% improvement (that's never gonna happen, but there
    is no performance drop anywhere).

Tested on SoMC Tama Akatsuki RoW

This was taken from
Repo:
https://github.com/sonyxperiadev/kernel
PR: 2039 ([2.3.2.r1.4] Performance improvements)
This commit is contained in:
Angelo G. Del Regno
2019-08-06 18:33:22 +02:00
committed by DhineshCool
parent 2e6317f80a
commit 1ad6cc0c62
23 changed files with 1338 additions and 198 deletions

View File

@@ -73,6 +73,10 @@ Description
They just have to "refill" this credit if they consume extra bytes. This is
an implementation design choice independent on the algorithm or encoding.
For maximum compatibility, both versions are available under different names
(lzo and lzo-rle). Differences in the encoding are noted in this document with
e.g.: version 1 only.
Byte sequences
First byte encoding :
@@ -134,6 +138,11 @@ Byte sequences
state = S (copy S literals after this block)
End of stream is reached if distance == 16384
In version 1 only, this instruction is also used to encode a run of
zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1.
In this case, it is followed by a fourth byte, X.
run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4.
0 0 1 L L L L L (32..63)
Copy of small block within 16kB distance (preferably less than 34B)
length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
@@ -158,7 +167,9 @@ Byte sequences
Authors
This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an
analysis of the decompression code available in Linux 3.16-rc5. The code is
tricky, it is possible that this document contains mistakes or that a few
corner cases were overlooked. In any case, please report any doubt, fix, or
proposed updates to the author(s) so that the document can be updated.
analysis of the decompression code available in Linux 3.16-rc5, and updated
by Dave Rodgman <dave.rodgman@arm.com> on 2018/10/30 to introduce run-length
encoding. The code is tricky, it is possible that this document contains
mistakes or that a few corner cases were overlooked. In any case, please
report any doubt, fix, or proposed updates to the author(s) so that the
document can be updated.

View File

@@ -9,6 +9,7 @@ config ARM64
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE

View File

@@ -25,6 +25,7 @@
#include <asm/virt.h>
#include <linux/acpi.h>
#include <linux/clocksource.h>
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
@@ -1233,3 +1234,32 @@ static struct platform_driver armv8_pmu_driver = {
};
builtin_platform_driver(armv8_pmu_driver);
void arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg, u64 now)
{
u32 freq;
u32 shift;
/*
* Internal timekeeping for enabled/running/stopped times
* is always computed with the sched_clock.
*/
freq = arch_timer_get_rate();
userpg->cap_user_time = 1;
clocks_calc_mult_shift(&userpg->time_mult, &shift, freq,
NSEC_PER_SEC, 0);
/*
* time_shift is not expected to be greater than 31 due to
* the original published conversion algorithm shifting a
* 32-bit value (now specifies a 64-bit value) - refer
* perf_event_mmap_page documentation in perf_event.h.
*/
if (shift == 32) {
shift = 31;
userpg->time_mult >>= 1;
}
userpg->time_shift = (u16)shift;
userpg->time_offset = -now;
}

View File

@@ -121,7 +121,7 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o
obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
obj-$(CONFIG_CRYPTO_842) += 842.o

175
crypto/lzo-rle.c Normal file
View File

@@ -0,0 +1,175 @@
/*
* Cryptographic API.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/lzo.h>
#include <crypto/internal/scompress.h>
struct lzorle_ctx {
void *lzorle_comp_mem;
};
static void *lzorle_alloc_ctx(struct crypto_scomp *tfm)
{
void *ctx;
ctx = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
return ctx;
}
static int lzorle_init(struct crypto_tfm *tfm)
{
struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
ctx->lzorle_comp_mem = lzorle_alloc_ctx(NULL);
if (IS_ERR(ctx->lzorle_comp_mem))
return -ENOMEM;
return 0;
}
static void lzorle_free_ctx(struct crypto_scomp *tfm, void *ctx)
{
kvfree(ctx);
}
static void lzorle_exit(struct crypto_tfm *tfm)
{
struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
lzorle_free_ctx(NULL, ctx->lzorle_comp_mem);
}
static int __lzorle_compress(const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen, void *ctx)
{
size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
int err;
err = lzorle1x_1_compress(src, slen, dst, &tmp_len, ctx);
if (err != LZO_E_OK)
return -EINVAL;
*dlen = tmp_len;
return 0;
}
static int lzorle_compress(struct crypto_tfm *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen)
{
struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
return __lzorle_compress(src, slen, dst, dlen, ctx->lzorle_comp_mem);
}
static int lzorle_scompress(struct crypto_scomp *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen,
void *ctx)
{
return __lzorle_compress(src, slen, dst, dlen, ctx);
}
static int __lzorle_decompress(const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen)
{
int err;
size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
err = lzo1x_decompress_safe(src, slen, dst, &tmp_len);
if (err != LZO_E_OK)
return -EINVAL;
*dlen = tmp_len;
return 0;
}
static int lzorle_decompress(struct crypto_tfm *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen)
{
return __lzorle_decompress(src, slen, dst, dlen);
}
static int lzorle_sdecompress(struct crypto_scomp *tfm, const u8 *src,
unsigned int slen, u8 *dst, unsigned int *dlen,
void *ctx)
{
return __lzorle_decompress(src, slen, dst, dlen);
}
static struct crypto_alg alg = {
.cra_name = "lzo-rle",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct lzorle_ctx),
.cra_module = THIS_MODULE,
.cra_init = lzorle_init,
.cra_exit = lzorle_exit,
.cra_u = { .compress = {
.coa_compress = lzorle_compress,
.coa_decompress = lzorle_decompress } }
};
static struct scomp_alg scomp = {
.alloc_ctx = lzorle_alloc_ctx,
.free_ctx = lzorle_free_ctx,
.compress = lzorle_scompress,
.decompress = lzorle_sdecompress,
.base = {
.cra_name = "lzo-rle",
.cra_driver_name = "lzo-rle-scomp",
.cra_module = THIS_MODULE,
}
};
static int __init lzorle_mod_init(void)
{
int ret;
ret = crypto_register_alg(&alg);
if (ret)
return ret;
ret = crypto_register_scomp(&scomp);
if (ret) {
crypto_unregister_alg(&alg);
return ret;
}
return ret;
}
static void __exit lzorle_mod_fini(void)
{
crypto_unregister_alg(&alg);
crypto_unregister_scomp(&scomp);
}
module_init(lzorle_mod_init);
module_exit(lzorle_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LZO-RLE Compression Algorithm");
MODULE_ALIAS_CRYPTO("lzo-rle");

View File

@@ -73,7 +73,8 @@ static char *check[] = {
"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt",
"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
"lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
"lzo", "lzo-rle", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384",
"sha3-512",
NULL
};

View File

@@ -20,6 +20,7 @@
static const char * const backends[] = {
"lzo",
"lzo-rle",
#if IS_ENABLED(CONFIG_CRYPTO_LZ4)
"lz4",
#endif

View File

@@ -17,12 +17,16 @@
#define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short))
#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS
#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3)
#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3 + 2)
/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */
int lzo1x_1_compress(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len, void *wrkmem);
/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */
int lzorle1x_1_compress(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len, void *wrkmem);
/* safe decompression with overrun testing */
int lzo1x_decompress_safe(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len);

View File

@@ -16,7 +16,8 @@ struct address_space;
struct pagevec {
unsigned char nr;
unsigned long cold;
bool cold;
bool drained;
struct page *pages[PAGEVEC_SIZE];
};
@@ -45,6 +46,7 @@ static inline void pagevec_init(struct pagevec *pvec, int cold)
{
pvec->nr = 0;
pvec->cold = cold;
pvec->drained = false;
}
static inline void pagevec_reinit(struct pagevec *pvec)

View File

@@ -27,6 +27,7 @@ struct bio;
#define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
SWAP_FLAG_DISCARD_PAGES)
#define SWAP_BATCH 64
static inline int current_is_kswapd(void)
{
@@ -178,6 +179,12 @@ enum {
* protected by swap_info_struct.lock.
*/
struct swap_cluster_info {
spinlock_t lock; /*
* Protect swap_cluster_info fields
* and swap_info_struct->swap_map
* elements correspond to the swap
* cluster
*/
unsigned int data:24;
unsigned int flags:8;
};
@@ -365,8 +372,13 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
sector_t *);
/* linux/mm/swap_state.c */
extern struct address_space swapper_spaces[];
#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
/* One swap address space for each 64M swap space */
#define SWAP_ADDRESS_SPACE_SHIFT 14
#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
extern struct address_space *swapper_spaces[];
#define swap_address_space(entry) \
(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
>> SWAP_ADDRESS_SPACE_SHIFT])
extern unsigned long total_swapcache_pages(void);
extern void show_swap_cache_info(void);
extern int add_to_swap(struct page *, struct list_head *list);
@@ -389,6 +401,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
extern atomic_long_t nr_swap_pages;
extern long total_swap_pages;
extern bool is_swap_fast(swp_entry_t entry);
extern bool has_usable_swap(void);
/* Swap 50% full? Release swapcache more aggressively.. */
static inline bool vm_swap_full(struct swap_info_struct *si)
@@ -411,23 +424,31 @@ static inline long get_nr_swap_pages(void)
extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t get_swap_page(void);
extern swp_entry_t get_swap_page_of_type(int);
extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
extern void swap_free(swp_entry_t);
extern void swapcache_free(swp_entry_t);
extern void swapcache_free_entries(swp_entry_t *entries, int n);
extern int free_swap_and_cache(swp_entry_t);
extern int swap_type_of(dev_t, sector_t, struct block_device **);
extern unsigned int count_swap_pages(int, int);
extern sector_t map_swap_page(struct page *, struct block_device **);
extern sector_t swapdev_block(int, pgoff_t);
extern int page_swapcount(struct page *);
extern int __swp_swapcount(swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
extern struct swap_info_struct *page_swap_info(struct page *);
extern bool reuse_swap_page(struct page *, int *);
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
extern void exit_swap_address_space(unsigned int type);
extern int get_swap_slots(int n, swp_entry_t *slots);
extern void swapcache_free_batch(swp_entry_t *entries, int n);
#else /* CONFIG_SWAP */
@@ -515,6 +536,11 @@ static inline int page_swapcount(struct page *page)
return 0;
}
static inline int __swp_swapcount(swp_entry_t entry)
{
return 0;
}
static inline int swp_swapcount(swp_entry_t entry)
{
return 0;

View File

@@ -0,0 +1,30 @@
#ifndef _LINUX_SWAP_SLOTS_H
#define _LINUX_SWAP_SLOTS_H
#include <linux/swap.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH
#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE)
#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE)
struct swap_slots_cache {
bool lock_initialized;
struct mutex alloc_lock; /* protects slots, nr, cur */
swp_entry_t *slots;
int nr;
int cur;
spinlock_t free_lock; /* protects slots_ret, n_ret */
swp_entry_t *slots_ret;
int n_ret;
};
void disable_swap_slots_cache_lock(void);
void reenable_swap_slots_cache_unlock(void);
int enable_swap_slots_cache(void);
int free_swap_slot(swp_entry_t entry);
extern bool swap_slot_cache_enabled;
#endif /* _LINUX_SWAP_SLOTS_H */

View File

@@ -5075,8 +5075,8 @@ void perf_event_update_userpage(struct perf_event *event)
userpg = rb->user_page;
/*
* Disable preemption so as to not let the corresponding user-space
* spin too long if we get preempted.
* Disable preemption to guarantee consistent time stamps are stored to
* the user page.
*/
preempt_disable();
++userpg->lock;

View File

@@ -20,7 +20,8 @@
static noinline size_t
lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len,
size_t ti, void *wrkmem)
size_t ti, void *wrkmem, signed char *state_offset,
const unsigned char bitstream_version)
{
const unsigned char *ip;
unsigned char *op;
@@ -35,27 +36,85 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
ip += ti < 4 ? 4 - ti : 0;
for (;;) {
const unsigned char *m_pos;
const unsigned char *m_pos = NULL;
size_t t, m_len, m_off;
u32 dv;
u32 run_length = 0;
literal:
ip += 1 + ((ip - ii) >> 5);
next:
if (unlikely(ip >= ip_end))
break;
dv = get_unaligned_le32(ip);
t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
m_pos = in + dict[t];
dict[t] = (lzo_dict_t) (ip - in);
if (unlikely(dv != get_unaligned_le32(m_pos)))
goto literal;
if (dv == 0 && bitstream_version) {
const unsigned char *ir = ip + 4;
const unsigned char *limit = ip_end
< (ip + MAX_ZERO_RUN_LENGTH + 1)
? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1;
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
defined(LZO_FAST_64BIT_MEMORY_ACCESS)
u64 dv64;
for (; (ir + 32) <= limit; ir += 32) {
dv64 = get_unaligned((u64 *)ir);
dv64 |= get_unaligned((u64 *)ir + 1);
dv64 |= get_unaligned((u64 *)ir + 2);
dv64 |= get_unaligned((u64 *)ir + 3);
if (dv64)
break;
}
for (; (ir + 8) <= limit; ir += 8) {
dv64 = get_unaligned((u64 *)ir);
if (dv64) {
# if defined(__LITTLE_ENDIAN)
ir += __builtin_ctzll(dv64) >> 3;
# elif defined(__BIG_ENDIAN)
ir += __builtin_clzll(dv64) >> 3;
# else
# error "missing endian definition"
# endif
break;
}
}
#else
while ((ir < (const unsigned char *)
ALIGN((uintptr_t)ir, 4)) &&
(ir < limit) && (*ir == 0))
ir++;
for (; (ir + 4) <= limit; ir += 4) {
dv = *((u32 *)ir);
if (dv) {
# if defined(__LITTLE_ENDIAN)
ir += __builtin_ctz(dv) >> 3;
# elif defined(__BIG_ENDIAN)
ir += __builtin_clz(dv) >> 3;
# else
# error "missing endian definition"
# endif
break;
}
}
#endif
while (likely(ir < limit) && unlikely(*ir == 0))
ir++;
run_length = ir - ip;
if (run_length > MAX_ZERO_RUN_LENGTH)
run_length = MAX_ZERO_RUN_LENGTH;
} else {
t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
m_pos = in + dict[t];
dict[t] = (lzo_dict_t) (ip - in);
if (unlikely(dv != get_unaligned_le32(m_pos)))
goto literal;
}
ii -= ti;
ti = 0;
t = ip - ii;
if (t != 0) {
if (t <= 3) {
op[-2] |= t;
op[*state_offset] |= t;
COPY4(op, ii);
op += t;
} else if (t <= 16) {
@@ -88,6 +147,17 @@ next:
}
}
if (unlikely(run_length)) {
ip += run_length;
run_length -= MIN_ZERO_RUN_LENGTH;
put_unaligned_le32((run_length << 21) | 0xfffc18
| (run_length & 0x7), op);
op += 4;
run_length = 0;
*state_offset = -3;
goto finished_writing_instruction;
}
m_len = 4;
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
@@ -170,7 +240,6 @@ m_len_done:
m_off = ip - m_pos;
ip += m_len;
ii = ip;
if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
m_off -= 1;
*op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
@@ -207,29 +276,48 @@ m_len_done:
*op++ = (m_off << 2);
*op++ = (m_off >> 6);
}
*state_offset = -2;
finished_writing_instruction:
ii = ip;
goto next;
}
*out_len = op - out;
return in_end - (ii - ti);
}
int lzo1x_1_compress(const unsigned char *in, size_t in_len,
int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len,
void *wrkmem)
void *wrkmem, const unsigned char bitstream_version)
{
const unsigned char *ip = in;
unsigned char *op = out;
unsigned char *data_start;
size_t l = in_len;
size_t t = 0;
signed char state_offset = -2;
unsigned int m4_max_offset;
// LZO v0 will never write 17 as first byte (except for zero-length
// input), so this is used to version the bitstream
if (bitstream_version > 0) {
*op++ = 17;
*op++ = bitstream_version;
m4_max_offset = M4_MAX_OFFSET_V1;
} else {
m4_max_offset = M4_MAX_OFFSET_V0;
}
data_start = op;
while (l > 20) {
size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1);
uintptr_t ll_end = (uintptr_t) ip + ll;
if ((ll_end + ((t + ll) >> 5)) <= ll_end)
break;
BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem,
&state_offset, bitstream_version);
ip += ll;
op += *out_len;
l -= ll;
@@ -239,10 +327,10 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
if (t > 0) {
const unsigned char *ii = in + in_len - t;
if (op == out && t <= 238) {
if (op == data_start && t <= 238) {
*op++ = (17 + t);
} else if (t <= 3) {
op[-2] |= t;
op[state_offset] |= t;
} else if (t <= 18) {
*op++ = (t - 3);
} else {
@@ -273,7 +361,24 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
*out_len = op - out;
return LZO_E_OK;
}
int lzo1x_1_compress(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len,
void *wrkmem)
{
return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0);
}
int lzorle1x_1_compress(const unsigned char *in, size_t in_len,
unsigned char *out, size_t *out_len,
void *wrkmem)
{
return lzogeneric1x_1_compress(in, in_len, out, out_len,
wrkmem, LZO_VERSION);
}
EXPORT_SYMBOL_GPL(lzo1x_1_compress);
EXPORT_SYMBOL_GPL(lzorle1x_1_compress);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LZO1X-1 Compressor");

View File

@@ -46,11 +46,21 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
const unsigned char * const ip_end = in + in_len;
unsigned char * const op_end = out + *out_len;
unsigned char bitstream_version;
op = out;
ip = in;
if (unlikely(in_len < 3))
goto input_overrun;
if (likely(in_len >= 5) && likely(*ip == 17)) {
bitstream_version = ip[1];
ip += 2;
} else {
bitstream_version = 0;
}
if (*ip > 17) {
t = *ip++ - 17;
if (t < 4) {
@@ -154,32 +164,49 @@ copy_literal_run:
m_pos -= next >> 2;
next &= 3;
} else {
m_pos = op;
m_pos -= (t & 8) << 11;
t = (t & 7) + (3 - 1);
if (unlikely(t == 2)) {
size_t offset;
const unsigned char *ip_last = ip;
while (unlikely(*ip == 0)) {
ip++;
NEED_IP(1);
}
offset = ip - ip_last;
if (unlikely(offset > MAX_255_COUNT))
return LZO_E_ERROR;
offset = (offset << 8) - offset;
t += offset + 7 + *ip++;
NEED_IP(2);
}
NEED_IP(2);
next = get_unaligned_le16(ip);
ip += 2;
m_pos -= next >> 2;
next &= 3;
if (m_pos == op)
goto eof_found;
m_pos -= 0x4000;
if (((next & 0xfffc) == 0xfffc) &&
((t & 0xf8) == 0x18) &&
likely(bitstream_version)) {
NEED_IP(3);
t &= 7;
t |= ip[2] << 3;
t += MIN_ZERO_RUN_LENGTH;
NEED_OP(t);
memset(op, 0, t);
op += t;
next &= 3;
ip += 3;
goto match_next;
} else {
m_pos = op;
m_pos -= (t & 8) << 11;
t = (t & 7) + (3 - 1);
if (unlikely(t == 2)) {
size_t offset;
const unsigned char *ip_last = ip;
while (unlikely(*ip == 0)) {
ip++;
NEED_IP(1);
}
offset = ip - ip_last;
if (unlikely(offset > MAX_255_COUNT))
return LZO_E_ERROR;
offset = (offset << 8) - offset;
t += offset + 7 + *ip++;
NEED_IP(2);
next = get_unaligned_le16(ip);
}
ip += 2;
m_pos -= next >> 2;
next &= 3;
if (m_pos == op)
goto eof_found;
m_pos -= 0x4000;
}
}
TEST_LB(m_pos);
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)

View File

@@ -1,3 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* lzodefs.h -- architecture, OS and compiler specific defines
*
@@ -12,9 +13,15 @@
*/
/* Version
* 0: original lzo version
* 1: lzo with support for RLE
*/
#define LZO_VERSION 1
#define COPY4(dst, src) \
put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
#if defined(__x86_64__)
#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
#define COPY8(dst, src) \
put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst))
#else
@@ -24,19 +31,21 @@
#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
#error "conflicting endian definitions"
#elif defined(__x86_64__)
#elif defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
#define LZO_USE_CTZ64 1
#define LZO_USE_CTZ32 1
#elif defined(__i386__) || defined(__powerpc__)
#define LZO_FAST_64BIT_MEMORY_ACCESS
#elif defined(CONFIG_X86) || defined(CONFIG_PPC)
#define LZO_USE_CTZ32 1
#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5)
#elif defined(CONFIG_ARM) && (__LINUX_ARM_ARCH__ >= 5)
#define LZO_USE_CTZ32 1
#endif
#define M1_MAX_OFFSET 0x0400
#define M2_MAX_OFFSET 0x0800
#define M3_MAX_OFFSET 0x4000
#define M4_MAX_OFFSET 0xbfff
#define M4_MAX_OFFSET_V0 0xbfff
#define M4_MAX_OFFSET_V1 0xbffe
#define M1_MIN_LEN 2
#define M1_MAX_LEN 2
@@ -52,6 +61,9 @@
#define M3_MARKER 32
#define M4_MARKER 16
#define MIN_ZERO_RUN_LENGTH 4
#define MAX_ZERO_RUN_LENGTH (2047 + MIN_ZERO_RUN_LENGTH)
#define lzo_dict_t unsigned short
#define D_BITS 13
#define D_SIZE (1u << D_BITS)

View File

@@ -38,7 +38,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \
mm_init.o mmu_context.o percpu.o slab_common.o \
compaction.o vmacache.o \
compaction.o vmacache.o swap_slots.o \
interval_tree.o list_lru.o workingset.o \
debug.o $(mmu-y) showmem.o vmpressure.o

View File

@@ -810,7 +810,10 @@ EXPORT_SYMBOL(release_pages);
*/
void __pagevec_release(struct pagevec *pvec)
{
lru_add_drain();
if (!pvec->drained) {
lru_add_drain();
pvec->drained = true;
}
release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
pagevec_reinit(pvec);
}
@@ -981,13 +984,6 @@ EXPORT_SYMBOL(pagevec_lookup_range_nr_tag);
*/
void __init swap_setup(void)
{
#ifdef CONFIG_SWAP
int i;
for (i = 0; i < MAX_SWAPFILES; i++)
spin_lock_init(&swapper_spaces[i].tree_lock);
#endif
/*
* Right now other parts of the system means that we
* _really_ don't want to cluster much more

View File

@@ -17,8 +17,9 @@
#define SWAP_RATIO_GROUP_START (SWAP_FLAG_PRIO_MASK - 9) /* 32758 */
#define SWAP_RATIO_GROUP_END (SWAP_FLAG_PRIO_MASK) /* 32767 */
#define SWAP_FAST_WRITES (SWAPFILE_CLUSTER * (SWAP_CLUSTER_MAX / 8))
#define SWAP_SLOW_WRITES SWAPFILE_CLUSTER
#define SWAP_FAST_WRITES \
((SWAPFILE_CLUSTER * (SWAP_CLUSTER_MAX / 8)) / SWAP_BATCH)
#define SWAP_SLOW_WRITES (SWAPFILE_CLUSTER / SWAP_BATCH)
/*
* The fast/slow swap write ratio.

345
mm/swap_slots.c Normal file
View File

@@ -0,0 +1,345 @@
/*
* Manage cache of swap slots to be used for and returned from
* swap.
*
* Copyright(c) 2016 Intel Corporation.
*
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* We allocate the swap slots from the global pool and put
* it into local per cpu caches. This has the advantage
* of no needing to acquire the swap_info lock every time
* we need a new slot.
*
* There is also opportunity to simply return the slot
* to local caches without needing to acquire swap_info
* lock. We do not reuse the returned slots directly but
* move them back to the global pool in a batch. This
* allows the slots to coaellesce and reduce fragmentation.
*
* The swap entry allocated is marked with SWAP_HAS_CACHE
* flag in map_count that prevents it from being allocated
* again from the global pool.
*
* The swap slots cache is protected by a mutex instead of
* a spin lock as when we search for slots with scan_swap_map,
* we can possibly sleep.
*/
#include <linux/swap_slots.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
#include <linux/mm.h>
#ifdef CONFIG_SWAP
static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
static bool swap_slot_cache_active;
bool swap_slot_cache_enabled;
static bool swap_slot_cache_initialized;
DEFINE_MUTEX(swap_slots_cache_mutex);
/* Serialize swap slots cache enable/disable operations */
DEFINE_MUTEX(swap_slots_cache_enable_mutex);
static void __drain_swap_slots_cache(unsigned int type);
static void deactivate_swap_slots_cache(void);
static void reactivate_swap_slots_cache(void);
#define use_swap_slot_cache (swap_slot_cache_active && \
swap_slot_cache_enabled && swap_slot_cache_initialized)
#define SLOTS_CACHE 0x1
#define SLOTS_CACHE_RET 0x2
static void deactivate_swap_slots_cache(void)
{
mutex_lock(&swap_slots_cache_mutex);
swap_slot_cache_active = false;
__drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
mutex_unlock(&swap_slots_cache_mutex);
}
static void reactivate_swap_slots_cache(void)
{
mutex_lock(&swap_slots_cache_mutex);
swap_slot_cache_active = true;
mutex_unlock(&swap_slots_cache_mutex);
}
/* Must not be called with cpu hot plug lock */
void disable_swap_slots_cache_lock(void)
{
mutex_lock(&swap_slots_cache_enable_mutex);
swap_slot_cache_enabled = false;
if (swap_slot_cache_initialized) {
/* serialize with cpu hotplug operations */
get_online_cpus();
__drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
put_online_cpus();
}
}
static void __reenable_swap_slots_cache(void)
{
swap_slot_cache_enabled = has_usable_swap();
}
void reenable_swap_slots_cache_unlock(void)
{
__reenable_swap_slots_cache();
mutex_unlock(&swap_slots_cache_enable_mutex);
}
static bool check_cache_active(void)
{
long pages;
if (!swap_slot_cache_enabled || !swap_slot_cache_initialized)
return false;
pages = get_nr_swap_pages();
if (!swap_slot_cache_active) {
if (pages > num_online_cpus() *
THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE)
reactivate_swap_slots_cache();
goto out;
}
/* if global pool of slot caches too low, deactivate cache */
if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE)
deactivate_swap_slots_cache();
out:
return swap_slot_cache_active;
}
static int alloc_swap_slot_cache(unsigned int cpu)
{
struct swap_slots_cache *cache;
swp_entry_t *slots, *slots_ret;
/*
* Do allocation outside swap_slots_cache_mutex
* as kvzalloc could trigger reclaim and get_swap_page,
* which can lock swap_slots_cache_mutex.
*/
slots = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
GFP_KERNEL);
if (!slots)
return -ENOMEM;
slots_ret = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
GFP_KERNEL);
if (!slots_ret) {
kvfree(slots);
return -ENOMEM;
}
mutex_lock(&swap_slots_cache_mutex);
cache = &per_cpu(swp_slots, cpu);
if (cache->slots || cache->slots_ret)
/* cache already allocated */
goto out;
if (!cache->lock_initialized) {
mutex_init(&cache->alloc_lock);
spin_lock_init(&cache->free_lock);
cache->lock_initialized = true;
}
cache->nr = 0;
cache->cur = 0;
cache->n_ret = 0;
cache->slots = slots;
slots = NULL;
cache->slots_ret = slots_ret;
slots_ret = NULL;
out:
mutex_unlock(&swap_slots_cache_mutex);
if (slots)
kvfree(slots);
if (slots_ret)
kvfree(slots_ret);
return 0;
}
static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
bool free_slots)
{
struct swap_slots_cache *cache;
swp_entry_t *slots = NULL;
cache = &per_cpu(swp_slots, cpu);
if ((type & SLOTS_CACHE) && cache->slots) {
mutex_lock(&cache->alloc_lock);
swapcache_free_entries(cache->slots + cache->cur, cache->nr);
cache->cur = 0;
cache->nr = 0;
if (free_slots && cache->slots) {
kvfree(cache->slots);
cache->slots = NULL;
}
mutex_unlock(&cache->alloc_lock);
}
if ((type & SLOTS_CACHE_RET) && cache->slots_ret) {
spin_lock_irq(&cache->free_lock);
swapcache_free_entries(cache->slots_ret, cache->n_ret);
cache->n_ret = 0;
if (free_slots && cache->slots_ret) {
slots = cache->slots_ret;
cache->slots_ret = NULL;
}
spin_unlock_irq(&cache->free_lock);
if (slots)
kvfree(slots);
}
}
static void __drain_swap_slots_cache(unsigned int type)
{
unsigned int cpu;
/*
* This function is called during
* 1) swapoff, when we have to make sure no
* left over slots are in cache when we remove
* a swap device;
* 2) disabling of swap slot cache, when we run low
* on swap slots when allocating memory and need
* to return swap slots to global pool.
*
* We cannot acquire cpu hot plug lock here as
* this function can be invoked in the cpu
* hot plug path:
* cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
* -> memory allocation -> direct reclaim -> get_swap_page
* -> drain_swap_slots_cache
*
* Hence the loop over current online cpu below could miss cpu that
* is being brought online but not yet marked as online.
* That is okay as we do not schedule and run anything on a
* cpu before it has been marked online. Hence, we will not
* fill any swap slots in slots cache of such cpu.
* There are no slots on such cpu that need to be drained.
*/
for_each_online_cpu(cpu)
drain_slots_cache_cpu(cpu, type, false);
}
static int free_slot_cache(unsigned int cpu)
{
mutex_lock(&swap_slots_cache_mutex);
drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true);
mutex_unlock(&swap_slots_cache_mutex);
return 0;
}
int enable_swap_slots_cache(void)
{
int ret = 0;
mutex_lock(&swap_slots_cache_enable_mutex);
if (swap_slot_cache_initialized) {
__reenable_swap_slots_cache();
goto out_unlock;
}
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache",
alloc_swap_slot_cache, free_slot_cache);
if (ret < 0)
goto out_unlock;
swap_slot_cache_initialized = true;
__reenable_swap_slots_cache();
out_unlock:
mutex_unlock(&swap_slots_cache_enable_mutex);
return 0;
}
/* called with swap slot cache's alloc lock held */
static int refill_swap_slots_cache(struct swap_slots_cache *cache)
{
if (!use_swap_slot_cache || cache->nr)
return 0;
cache->cur = 0;
if (swap_slot_cache_active)
cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, cache->slots);
return cache->nr;
}
int free_swap_slot(swp_entry_t entry)
{
struct swap_slots_cache *cache;
BUG_ON(!swap_slot_cache_initialized);
cache = &get_cpu_var(swp_slots);
if (use_swap_slot_cache && cache->slots_ret) {
spin_lock_irq(&cache->free_lock);
/* Swap slots cache may be deactivated before acquiring lock */
if (!use_swap_slot_cache) {
spin_unlock_irq(&cache->free_lock);
goto direct_free;
}
if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) {
/*
* Return slots to global pool.
* The current swap_map value is SWAP_HAS_CACHE.
* Set it to 0 to indicate it is available for
* allocation in global pool
*/
swapcache_free_entries(cache->slots_ret, cache->n_ret);
cache->n_ret = 0;
}
cache->slots_ret[cache->n_ret++] = entry;
spin_unlock_irq(&cache->free_lock);
} else {
direct_free:
swapcache_free_entries(&entry, 1);
}
put_cpu_var(swp_slots);
return 0;
}
swp_entry_t get_swap_page(void)
{
swp_entry_t entry, *pentry;
struct swap_slots_cache *cache;
/*
* Preemption is allowed here, because we may sleep
* in refill_swap_slots_cache(). But it is safe, because
* accesses to the per-CPU data structure are protected by the
* mutex cache->alloc_lock.
*
* The alloc path here does not touch cache->slots_ret
* so cache->free_lock is not taken.
*/
cache = raw_cpu_ptr(&swp_slots);
entry.val = 0;
if (check_cache_active()) {
mutex_lock(&cache->alloc_lock);
if (cache->slots) {
repeat:
if (cache->nr) {
pentry = &cache->slots[cache->cur++];
entry = *pentry;
pentry->val = 0;
cache->nr--;
} else {
if (refill_swap_slots_cache(cache))
goto repeat;
}
}
mutex_unlock(&cache->alloc_lock);
if (entry.val)
return entry;
}
get_swap_pages(1, &entry);
return entry;
}
#endif /* CONFIG_SWAP */

View File

@@ -17,6 +17,8 @@
#include <linux/blkdev.h>
#include <linux/pagevec.h>
#include <linux/migrate.h>
#include <linux/vmalloc.h>
#include <linux/swap_slots.h>
#include <asm/pgtable.h>
#include "internal.h"
@@ -33,15 +35,8 @@ static const struct address_space_operations swap_aops = {
#endif
};
struct address_space swapper_spaces[MAX_SWAPFILES] = {
[0 ... MAX_SWAPFILES - 1] = {
.page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
.i_mmap_writable = ATOMIC_INIT(0),
.a_ops = &swap_aops,
/* swap cache doesn't use writeback related tags */
.flags = 1 << AS_NO_WRITEBACK_TAGS,
}
};
struct address_space *swapper_spaces[MAX_SWAPFILES];
static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
@@ -54,11 +49,26 @@ static struct {
unsigned long total_swapcache_pages(void)
{
int i;
unsigned int i, j, nr;
unsigned long ret = 0;
struct address_space *spaces;
for (i = 0; i < MAX_SWAPFILES; i++)
ret += swapper_spaces[i].nrpages;
rcu_read_lock();
for (i = 0; i < MAX_SWAPFILES; i++) {
/*
* The corresponding entries in nr_swapper_spaces and
* swapper_spaces will be reused only after at least
* one grace period. So it is impossible for them
* belongs to different usage.
*/
nr = nr_swapper_spaces[i];
spaces = rcu_dereference(swapper_spaces[i]);
if (!nr || !spaces)
continue;
for (j = 0; j < nr; j++)
ret += spaces[j].nrpages;
}
rcu_read_unlock();
return ret;
}
@@ -315,6 +325,17 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
if (found_page)
break;
/*
* Just skip read ahead for unused swap slot.
* During swap_off when swap_slot_cache is disabled,
* we have to handle the race between putting
* swap entry in swap cache and marking swap slot
* as SWAP_HAS_CACHE. That's done in later part of code or
* else swap_off will be aborted if we return NULL.
*/
if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
break;
/*
* Get a new page to read into from swap.
*/
@@ -507,3 +528,38 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
skip:
return read_swap_cache_async(entry, gfp_mask, vma, addr);
}
int init_swap_address_space(unsigned int type, unsigned long nr_pages)
{
struct address_space *spaces, *space;
unsigned int i, nr;
nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
spaces = kvzalloc(sizeof(struct address_space) * nr, GFP_KERNEL);
if (!spaces)
return -ENOMEM;
for (i = 0; i < nr; i++) {
space = spaces + i;
INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN);
atomic_set(&space->i_mmap_writable, 0);
space->a_ops = &swap_aops;
/* swap cache doesn't use writeback related tags */
mapping_set_no_writeback_tags(space);
spin_lock_init(&space->tree_lock);
}
nr_swapper_spaces[type] = nr;
rcu_assign_pointer(swapper_spaces[type], spaces);
return 0;
}
void exit_swap_address_space(unsigned int type)
{
struct address_space *spaces;
spaces = swapper_spaces[type];
nr_swapper_spaces[type] = 0;
rcu_assign_pointer(swapper_spaces[type], NULL);
synchronize_rcu();
kvfree(spaces);
}

View File

@@ -34,6 +34,7 @@
#include <linux/frontswap.h>
#include <linux/swapfile.h>
#include <linux/export.h>
#include <linux/swap_slots.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -276,6 +277,47 @@ static inline void cluster_set_null(struct swap_cluster_info *info)
info->data = 0;
}
static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
unsigned long offset)
{
struct swap_cluster_info *ci;
ci = si->cluster_info;
if (ci) {
ci += offset / SWAPFILE_CLUSTER;
spin_lock(&ci->lock);
}
return ci;
}
static inline void unlock_cluster(struct swap_cluster_info *ci)
{
if (ci)
spin_unlock(&ci->lock);
}
static inline struct swap_cluster_info *lock_cluster_or_swap_info(
struct swap_info_struct *si,
unsigned long offset)
{
struct swap_cluster_info *ci;
ci = lock_cluster(si, offset);
if (!ci)
spin_lock(&si->lock);
return ci;
}
static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
struct swap_cluster_info *ci)
{
if (ci)
unlock_cluster(ci);
else
spin_unlock(&si->lock);
}
static inline bool cluster_list_empty(struct swap_cluster_list *list)
{
return cluster_is_null(&list->head);
@@ -300,9 +342,17 @@ static void cluster_list_add_tail(struct swap_cluster_list *list,
cluster_set_next_flag(&list->head, idx, 0);
cluster_set_next_flag(&list->tail, idx, 0);
} else {
struct swap_cluster_info *ci_tail;
unsigned int tail = cluster_next(&list->tail);
cluster_set_next(&ci[tail], idx);
/*
* Nested cluster lock, but both cluster locks are
* only acquired when we held swap_info_struct->lock
*/
ci_tail = ci + tail;
spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING);
cluster_set_next(ci_tail, idx);
unlock_cluster(ci_tail);
cluster_set_next_flag(&list->tail, idx, 0);
}
}
@@ -347,7 +397,7 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
*/
static void swap_do_scheduled_discard(struct swap_info_struct *si)
{
struct swap_cluster_info *info;
struct swap_cluster_info *info, *ci;
unsigned int idx;
info = si->cluster_info;
@@ -360,10 +410,14 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si)
SWAPFILE_CLUSTER);
spin_lock(&si->lock);
cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE);
ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
cluster_set_flag(ci, CLUSTER_FLAG_FREE);
unlock_cluster(ci);
cluster_list_add_tail(&si->free_clusters, info, idx);
ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
memset(si->swap_map + idx * SWAPFILE_CLUSTER,
0, SWAPFILE_CLUSTER);
unlock_cluster(ci);
}
}
@@ -462,12 +516,13 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
* Try to get a swap entry from current cpu's swap entry pool (a cluster). This
* might involve allocating a new cluster for current CPU too.
*/
static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
unsigned long *offset, unsigned long *scan_base)
{
struct percpu_cluster *cluster;
struct swap_cluster_info *ci;
bool found_free;
unsigned long tmp;
unsigned long tmp, max;
new_cluster:
cluster = this_cpu_ptr(si->percpu_cluster);
@@ -485,7 +540,7 @@ new_cluster:
*scan_base = *offset = si->cluster_next;
goto new_cluster;
} else
return;
return false;
}
found_free = false;
@@ -495,14 +550,21 @@ new_cluster:
* check if there is still free entry in the cluster
*/
tmp = cluster->next;
while (tmp < si->max && tmp < (cluster_next(&cluster->index) + 1) *
SWAPFILE_CLUSTER) {
max = min_t(unsigned long, si->max,
(cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
if (tmp >= max) {
cluster_set_null(&cluster->index);
goto new_cluster;
}
ci = lock_cluster(si, tmp);
while (tmp < max) {
if (!si->swap_map[tmp]) {
found_free = true;
break;
}
tmp++;
}
unlock_cluster(ci);
if (!found_free) {
cluster_set_null(&cluster->index);
goto new_cluster;
@@ -510,15 +572,22 @@ new_cluster:
cluster->next = tmp + 1;
*offset = tmp;
*scan_base = tmp;
return found_free;
}
static unsigned long scan_swap_map(struct swap_info_struct *si,
unsigned char usage)
static int scan_swap_map_slots(struct swap_info_struct *si,
unsigned char usage, int nr,
swp_entry_t slots[])
{
struct swap_cluster_info *ci;
unsigned long offset;
unsigned long scan_base;
unsigned long last_in_cluster = 0;
int latency_ration = LATENCY_LIMIT;
int n_ret = 0;
if (nr > SWAP_BATCH)
nr = SWAP_BATCH;
/*
* We try to cluster swap pages by allocating them sequentially
@@ -536,8 +605,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
/* SSD algorithm */
if (si->cluster_info) {
scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
goto checks;
if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
goto checks;
else
goto scan;
}
if (unlikely(!si->cluster_nr--)) {
@@ -581,8 +652,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
checks:
if (si->cluster_info) {
while (scan_swap_map_ssd_cluster_conflict(si, offset))
scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
/* take a break if we already got some slots */
if (n_ret)
goto done;
if (!scan_swap_map_try_ssd_cluster(si, &offset,
&scan_base))
goto scan;
}
}
if (!(si->flags & SWP_WRITEOK))
goto no_page;
@@ -591,9 +668,11 @@ checks:
if (offset > si->highest_bit)
scan_base = offset = si->lowest_bit;
ci = lock_cluster(si, offset);
/* reuse swap entry of cache-only swap if not busy. */
if (vm_swap_full(si) && si->swap_map[offset] == SWAP_HAS_CACHE) {
int swap_was_freed;
unlock_cluster(ci);
spin_unlock(&si->lock);
swap_was_freed = __try_to_reclaim_swap(si, offset);
spin_lock(&si->lock);
@@ -603,8 +682,16 @@ checks:
goto scan; /* check next one */
}
if (si->swap_map[offset])
goto scan;
if (si->swap_map[offset]) {
unlock_cluster(ci);
if (!n_ret)
goto scan;
else
goto done;
}
si->swap_map[offset] = usage;
inc_cluster_info_page(si, si->cluster_info, offset);
unlock_cluster(ci);
if (offset == si->lowest_bit)
si->lowest_bit++;
@@ -618,12 +705,44 @@ checks:
plist_del(&si->avail_list, &swap_avail_head);
spin_unlock(&swap_avail_lock);
}
si->swap_map[offset] = usage;
inc_cluster_info_page(si, si->cluster_info, offset);
si->cluster_next = offset + 1;
si->flags -= SWP_SCANNING;
slots[n_ret++] = swp_entry(si->type, offset);
return offset;
/* got enough slots or reach max slots? */
if ((n_ret == nr) || (offset >= si->highest_bit))
goto done;
/* search for next available slot */
/* time to take a break? */
if (unlikely(--latency_ration < 0)) {
if (n_ret)
goto done;
spin_unlock(&si->lock);
cond_resched();
spin_lock(&si->lock);
latency_ration = LATENCY_LIMIT;
}
/* try to get more slots in cluster */
if (si->cluster_info) {
if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
goto checks;
else
goto done;
}
/* non-ssd case */
++offset;
/* non-ssd case, still more slots in cluster? */
if (si->cluster_nr && !si->swap_map[offset]) {
--si->cluster_nr;
goto checks;
}
done:
si->flags -= SWP_SCANNING;
return n_ret;
scan:
spin_unlock(&si->lock);
@@ -663,18 +782,42 @@ scan:
no_page:
si->flags -= SWP_SCANNING;
return 0;
return n_ret;
}
swp_entry_t get_swap_page(void)
static unsigned long scan_swap_map(struct swap_info_struct *si,
unsigned char usage)
{
swp_entry_t entry;
int n_ret;
n_ret = scan_swap_map_slots(si, usage, 1, &entry);
if (n_ret)
return swp_offset(entry);
else
return 0;
}
int get_swap_pages(int n_goal, swp_entry_t swp_entries[])
{
struct swap_info_struct *si, *next;
pgoff_t offset;
long avail_pgs;
int n_ret = 0;
int swap_ratio_off = 0;
if (atomic_long_read(&nr_swap_pages) <= 0)
avail_pgs = atomic_long_read(&nr_swap_pages);
if (avail_pgs <= 0)
goto noswap;
atomic_long_dec(&nr_swap_pages);
if (n_goal > SWAP_BATCH)
n_goal = SWAP_BATCH;
if (n_goal > avail_pgs)
n_goal = avail_pgs;
atomic_long_sub(n_goal, &nr_swap_pages);
lock_and_start:
spin_lock(&swap_avail_lock);
@@ -720,14 +863,14 @@ start:
spin_unlock(&si->lock);
goto nextsi;
}
/* This is called for allocating swap entry for cache */
offset = scan_swap_map(si, SWAP_HAS_CACHE);
n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
n_goal, swp_entries);
spin_unlock(&si->lock);
if (offset)
return swp_entry(si->type, offset);
if (n_ret)
goto check_out;
pr_debug("scan_swap_map of si %d failed to find offset\n",
si->type);
si->type);
spin_lock(&swap_avail_lock);
nextsi:
/*
@@ -738,7 +881,8 @@ nextsi:
* up between us dropping swap_avail_lock and taking si->lock.
* Since we dropped the swap_avail_lock, the swap_avail_head
* list may have been modified; so if next is still in the
* swap_avail_head list then try it, otherwise start over.
* swap_avail_head list then try it, otherwise start over
* if we have not gotten any slots.
*/
if (plist_node_empty(&next->avail_list))
goto start_over;
@@ -746,9 +890,11 @@ nextsi:
spin_unlock(&swap_avail_lock);
atomic_long_inc(&nr_swap_pages);
check_out:
if (n_ret < n_goal)
atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);
noswap:
return (swp_entry_t) {0};
return n_ret;
}
/* The only caller of this function is now suspend routine */
@@ -773,7 +919,7 @@ swp_entry_t get_swap_page_of_type(int type)
return (swp_entry_t) {0};
}
static struct swap_info_struct *swap_info_get(swp_entry_t entry)
static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
{
struct swap_info_struct *p;
unsigned long offset, type;
@@ -789,34 +935,76 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry)
offset = swp_offset(entry);
if (offset >= p->max)
goto bad_offset;
if (!p->swap_map[offset])
goto bad_free;
spin_lock(&p->lock);
return p;
bad_free:
pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val);
goto out;
bad_offset:
pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val);
pr_err("swap_info_get: %s%08lx\n", Bad_offset, entry.val);
goto out;
bad_device:
pr_err("swap_free: %s%08lx\n", Unused_file, entry.val);
pr_err("swap_info_get: %s%08lx\n", Unused_file, entry.val);
goto out;
bad_nofile:
pr_err("swap_free: %s%08lx\n", Bad_file, entry.val);
pr_err("swap_info_get: %s%08lx\n", Bad_file, entry.val);
out:
return NULL;
}
static unsigned char swap_entry_free(struct swap_info_struct *p,
swp_entry_t entry, unsigned char usage)
static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
{
struct swap_info_struct *p;
p = __swap_info_get(entry);
if (!p)
goto out;
if (!p->swap_map[swp_offset(entry)])
goto bad_free;
return p;
bad_free:
pr_err("swap_info_get: %s%08lx\n", Unused_offset, entry.val);
goto out;
out:
return NULL;
}
static struct swap_info_struct *swap_info_get(swp_entry_t entry)
{
struct swap_info_struct *p;
p = _swap_info_get(entry);
if (p)
spin_lock(&p->lock);
return p;
}
static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
struct swap_info_struct *q)
{
struct swap_info_struct *p;
p = _swap_info_get(entry);
if (p != q) {
if (q != NULL)
spin_unlock(&q->lock);
if (p != NULL)
spin_lock(&p->lock);
}
return p;
}
static unsigned char __swap_entry_free(struct swap_info_struct *p,
swp_entry_t entry, unsigned char usage)
{
struct swap_cluster_info *ci;
unsigned long offset = swp_offset(entry);
unsigned char count;
unsigned char has_cache;
ci = lock_cluster_or_swap_info(p, offset);
count = p->swap_map[offset];
has_cache = count & SWAP_HAS_CACHE;
count &= ~SWAP_HAS_CACHE;
@@ -840,40 +1028,54 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
}
usage = count | has_cache;
p->swap_map[offset] = usage;
p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
/* free if no reference */
if (!usage) {
mem_cgroup_uncharge_swap(entry);
dec_cluster_info_page(p, p->cluster_info, offset);
if (offset < p->lowest_bit)
p->lowest_bit = offset;
if (offset > p->highest_bit) {
bool was_full = !p->highest_bit;
p->highest_bit = offset;
if (was_full && (p->flags & SWP_WRITEOK)) {
spin_lock(&swap_avail_lock);
WARN_ON(!plist_node_empty(&p->avail_list));
if (plist_node_empty(&p->avail_list))
plist_add(&p->avail_list,
&swap_avail_head);
spin_unlock(&swap_avail_lock);
}
}
atomic_long_inc(&nr_swap_pages);
p->inuse_pages--;
frontswap_invalidate_page(p->type, offset);
if (p->flags & SWP_BLKDEV) {
struct gendisk *disk = p->bdev->bd_disk;
if (disk->fops->swap_slot_free_notify)
disk->fops->swap_slot_free_notify(p->bdev,
offset);
}
}
unlock_cluster_or_swap_info(p, ci);
return usage;
}
static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
{
struct swap_cluster_info *ci;
unsigned long offset = swp_offset(entry);
unsigned char count;
ci = lock_cluster(p, offset);
count = p->swap_map[offset];
VM_BUG_ON(count != SWAP_HAS_CACHE);
p->swap_map[offset] = 0;
dec_cluster_info_page(p, p->cluster_info, offset);
unlock_cluster(ci);
mem_cgroup_uncharge_swap(entry);
if (offset < p->lowest_bit)
p->lowest_bit = offset;
if (offset > p->highest_bit) {
bool was_full = !p->highest_bit;
p->highest_bit = offset;
if (was_full && (p->flags & SWP_WRITEOK)) {
spin_lock(&swap_avail_lock);
WARN_ON(!plist_node_empty(&p->avail_list));
if (plist_node_empty(&p->avail_list))
plist_add(&p->avail_list,
&swap_avail_head);
spin_unlock(&swap_avail_lock);
}
}
atomic_long_inc(&nr_swap_pages);
p->inuse_pages--;
frontswap_invalidate_page(p->type, offset);
if (p->flags & SWP_BLKDEV) {
struct gendisk *disk = p->bdev->bd_disk;
if (disk->fops->swap_slot_free_notify)
disk->fops->swap_slot_free_notify(p->bdev,
offset);
}
}
/*
* Caller has made sure that the swap device corresponding to entry
* is still around or has not been recycled.
@@ -882,10 +1084,10 @@ void swap_free(swp_entry_t entry)
{
struct swap_info_struct *p;
p = swap_info_get(entry);
p = _swap_info_get(entry);
if (p) {
swap_entry_free(p, entry, 1);
spin_unlock(&p->lock);
if (!__swap_entry_free(p, entry, 1))
free_swap_slot(entry);
}
}
@@ -896,13 +1098,33 @@ void swapcache_free(swp_entry_t entry)
{
struct swap_info_struct *p;
p = swap_info_get(entry);
p = _swap_info_get(entry);
if (p) {
swap_entry_free(p, entry, SWAP_HAS_CACHE);
spin_unlock(&p->lock);
if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE))
free_swap_slot(entry);
}
}
void swapcache_free_entries(swp_entry_t *entries, int n)
{
struct swap_info_struct *p, *prev;
int i;
if (n <= 0)
return;
prev = NULL;
p = NULL;
for (i = 0; i < n; ++i) {
p = swap_info_get_cont(entries[i], prev);
if (p)
swap_entry_free(p, entries[i]);
prev = p;
}
if (p)
spin_unlock(&p->lock);
}
/*
* How many references to page are currently swapped out?
* This does not give an exact answer when swap count is continued,
@@ -912,17 +1134,49 @@ int page_swapcount(struct page *page)
{
int count = 0;
struct swap_info_struct *p;
struct swap_cluster_info *ci;
swp_entry_t entry;
unsigned long offset;
entry.val = page_private(page);
p = swap_info_get(entry);
p = _swap_info_get(entry);
if (p) {
count = swap_count(p->swap_map[swp_offset(entry)]);
spin_unlock(&p->lock);
offset = swp_offset(entry);
ci = lock_cluster_or_swap_info(p, offset);
count = swap_count(p->swap_map[offset]);
unlock_cluster_or_swap_info(p, ci);
}
return count;
}
static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
{
int count = 0;
pgoff_t offset = swp_offset(entry);
struct swap_cluster_info *ci;
ci = lock_cluster_or_swap_info(si, offset);
count = swap_count(si->swap_map[offset]);
unlock_cluster_or_swap_info(si, ci);
return count;
}
/*
* How many references to @entry are currently swapped out?
* This does not give an exact answer when swap count is continued,
* but does include the high COUNT_CONTINUED flag to allow for that.
*/
int __swp_swapcount(swp_entry_t entry)
{
int count = 0;
struct swap_info_struct *si;
si = __swap_info_get(entry);
if (si)
count = swap_swapcount(si, entry);
return count;
}
/*
* How many references to @entry are currently swapped out?
* This considers COUNT_CONTINUED so it returns exact answer.
@@ -931,22 +1185,26 @@ int swp_swapcount(swp_entry_t entry)
{
int count, tmp_count, n;
struct swap_info_struct *p;
struct swap_cluster_info *ci;
struct page *page;
pgoff_t offset;
unsigned char *map;
p = swap_info_get(entry);
p = _swap_info_get(entry);
if (!p)
return 0;
count = swap_count(p->swap_map[swp_offset(entry)]);
offset = swp_offset(entry);
ci = lock_cluster_or_swap_info(p, offset);
count = swap_count(p->swap_map[offset]);
if (!(count & COUNT_CONTINUED))
goto out;
count &= ~COUNT_CONTINUED;
n = SWAP_MAP_MAX + 1;
offset = swp_offset(entry);
page = vmalloc_to_page(p->swap_map + offset);
offset &= ~PAGE_MASK;
VM_BUG_ON(page_private(page) != SWP_CONTINUED);
@@ -961,7 +1219,7 @@ int swp_swapcount(swp_entry_t entry)
n *= (SWAP_CONT_MAX + 1);
} while (tmp_count & COUNT_CONTINUED);
out:
spin_unlock(&p->lock);
unlock_cluster_or_swap_info(p, ci);
return count;
}
@@ -1053,21 +1311,23 @@ int free_swap_and_cache(swp_entry_t entry)
{
struct swap_info_struct *p;
struct page *page = NULL;
unsigned char count;
if (non_swap_entry(entry))
return 1;
p = swap_info_get(entry);
p = _swap_info_get(entry);
if (p) {
if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
count = __swap_entry_free(p, entry, 1);
if (count == SWAP_HAS_CACHE) {
page = find_get_page(swap_address_space(entry),
swp_offset(entry));
if (page && !trylock_page(page)) {
put_page(page);
page = NULL;
}
}
spin_unlock(&p->lock);
} else if (!count)
free_swap_slot(entry);
}
if (page) {
/*
@@ -1075,7 +1335,8 @@ int free_swap_and_cache(swp_entry_t entry)
* Also recheck PageSwapCache now page is locked (above).
*/
if (PageSwapCache(page) && !PageWriteback(page) &&
(!page_mapped(page) || mem_cgroup_swap_full(page))) {
(!page_mapped(page) || mem_cgroup_swap_full(page)) &&
!swap_swapcount(p, entry)) {
delete_from_swap_cache(page);
SetPageDirty(page);
}
@@ -1290,6 +1551,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
pmd = pmd_offset(pud, addr);
do {
cond_resched();
next = pmd_addr_end(addr, end);
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
continue;
@@ -1369,6 +1631,7 @@ static int unuse_mm(struct mm_struct *mm,
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
break;
cond_resched();
}
up_read(&mm->mmap_sem);
return (ret < 0)? ret: 0;
@@ -1406,15 +1669,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
prev = 0;
i = 1;
}
if (frontswap) {
if (frontswap_test(si, i))
break;
else
continue;
}
count = READ_ONCE(si->swap_map[i]);
if (count && swap_count(count) != SWAP_MAP_BAD)
break;
if (!frontswap || frontswap_test(si, i))
break;
if ((i % LATENCY_LIMIT) == 0)
cond_resched();
}
return i;
}
@@ -1896,6 +2156,17 @@ static void reinsert_swap_info(struct swap_info_struct *p)
spin_unlock(&swap_lock);
}
bool has_usable_swap(void)
{
bool ret = true;
spin_lock(&swap_lock);
if (plist_head_empty(&swap_active_head))
ret = false;
spin_unlock(&swap_lock);
return ret;
}
SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
{
struct swap_info_struct *p = NULL;
@@ -1966,6 +2237,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
spin_unlock(&p->lock);
spin_unlock(&swap_lock);
disable_swap_slots_cache_lock();
set_current_oom_origin();
err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
clear_current_oom_origin();
@@ -1973,9 +2246,12 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
if (err) {
/* re-insert swap space back into swap_list */
reinsert_swap_info(p);
reenable_swap_slots_cache_unlock();
goto out_dput;
}
reenable_swap_slots_cache_unlock();
flush_work(&p->discard_work);
destroy_swap_extents(p);
@@ -2014,10 +2290,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
free_percpu(p->percpu_cluster);
p->percpu_cluster = NULL;
vfree(swap_map);
vfree(cluster_info);
vfree(frontswap_map);
kvfree(cluster_info);
kvfree(frontswap_map);
/* Destroy swap account information */
swap_cgroup_swapoff(p->type);
exit_swap_address_space(p->type);
inode = mapping->host;
if (S_ISBLK(inode->i_mode)) {
@@ -2359,6 +2636,13 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
return maxpages;
}
#define SWAP_CLUSTER_INFO_COLS \
DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
#define SWAP_CLUSTER_SPACE_COLS \
DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
#define SWAP_CLUSTER_COLS \
max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
static int setup_swap_map_and_extents(struct swap_info_struct *p,
union swap_header *swap_header,
unsigned char *swap_map,
@@ -2366,11 +2650,12 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
unsigned long maxpages,
sector_t *span)
{
int i;
unsigned int j, k;
unsigned int nr_good_pages;
int nr_extents;
unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
unsigned long idx = p->cluster_next / SWAPFILE_CLUSTER;
unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS;
unsigned long i, idx;
nr_good_pages = maxpages - 1; /* omit header page */
@@ -2418,15 +2703,23 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
if (!cluster_info)
return nr_extents;
for (i = 0; i < nr_clusters; i++) {
if (!cluster_count(&cluster_info[idx])) {
/*
* Reduce false cache line sharing between cluster_info and
* sharing same address space.
*/
for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
j = (k + col) % SWAP_CLUSTER_COLS;
for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
idx = i * SWAP_CLUSTER_COLS + j;
if (idx >= nr_clusters)
continue;
if (cluster_count(&cluster_info[idx]))
continue;
cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
cluster_list_add_tail(&p->free_clusters, cluster_info,
idx);
}
idx++;
if (idx == nr_clusters)
idx = 0;
}
return nr_extents;
}
@@ -2529,6 +2822,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
int cpu;
unsigned long ci, nr_cluster;
p->flags |= SWP_SOLIDSTATE;
/*
@@ -2536,13 +2830,18 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
* SSD
*/
p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
cluster_info = vzalloc(DIV_ROUND_UP(maxpages,
SWAPFILE_CLUSTER) * sizeof(*cluster_info));
cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info),
GFP_KERNEL);
if (!cluster_info) {
error = -ENOMEM;
goto bad_swap;
}
for (ci = 0; ci < nr_cluster; ci++)
spin_lock_init(&((cluster_info + ci)->lock));
p->percpu_cluster = alloc_percpu(struct percpu_cluster);
if (!p->percpu_cluster) {
error = -ENOMEM;
@@ -2567,7 +2866,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
/* frontswap enabled? set up bit-per-page map for frontswap */
if (IS_ENABLED(CONFIG_FRONTSWAP))
frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long),
GFP_KERNEL);
if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
/*
@@ -2602,6 +2902,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
if (p->bdev && blk_queue_fast(bdev_get_queue(p->bdev)))
p->flags |= SWP_FAST;
error = init_swap_address_space(p->type, maxpages);
if (error)
goto bad_swap;
mutex_lock(&swapon_mutex);
prio = -1;
if (swap_flags & SWAP_FLAG_PREFER) {
@@ -2642,7 +2946,8 @@ bad_swap:
p->flags = 0;
spin_unlock(&swap_lock);
vfree(swap_map);
vfree(cluster_info);
kvfree(cluster_info);
kvfree(frontswap_map);
if (swap_file) {
if (inode && S_ISREG(inode->i_mode)) {
inode_unlock(inode);
@@ -2659,6 +2964,8 @@ out:
putname(name);
if (inode && S_ISREG(inode->i_mode))
inode_unlock(inode);
if (!error)
enable_swap_slots_cache();
return error;
}
@@ -2693,6 +3000,7 @@ void si_swapinfo(struct sysinfo *val)
static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
{
struct swap_info_struct *p;
struct swap_cluster_info *ci;
unsigned long offset, type;
unsigned char count;
unsigned char has_cache;
@@ -2706,10 +3014,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
goto bad_file;
p = swap_info[type];
offset = swp_offset(entry);
spin_lock(&p->lock);
if (unlikely(offset >= p->max))
goto unlock_out;
goto out;
ci = lock_cluster_or_swap_info(p, offset);
count = p->swap_map[offset];
@@ -2752,7 +3060,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
p->swap_map[offset] = count | has_cache;
unlock_out:
spin_unlock(&p->lock);
unlock_cluster_or_swap_info(p, ci);
out:
return err;
@@ -2841,6 +3149,7 @@ EXPORT_SYMBOL_GPL(__page_file_index);
int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
{
struct swap_info_struct *si;
struct swap_cluster_info *ci;
struct page *head;
struct page *page;
struct page *list_page;
@@ -2864,6 +3173,9 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
}
offset = swp_offset(entry);
ci = lock_cluster(si, offset);
count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
@@ -2876,6 +3188,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
}
if (!page) {
unlock_cluster(ci);
spin_unlock(&si->lock);
return -ENOMEM;
}
@@ -2924,6 +3237,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
list_add_tail(&page->lru, &head->lru);
page = NULL; /* now it's attached, don't free it */
out:
unlock_cluster(ci);
spin_unlock(&si->lock);
outer:
if (page)
@@ -2937,7 +3251,8 @@ outer:
* into, carry if so, or else fail until a new continuation page is allocated;
* when the original swap_map count is decremented from 0 with continuation,
* borrow from the continuation and report whether it still holds more.
* Called while __swap_duplicate() or swap_entry_free() holds swap_lock.
* Called while __swap_duplicate() or swap_entry_free() holds swap or cluster
* lock.
*/
static bool swap_count_continued(struct swap_info_struct *si,
pgoff_t offset, unsigned char count)

View File

@@ -1591,6 +1591,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
prefetch(&sk->sk_rmem_alloc);
if (rcu_access_pointer(sk->sk_filter) &&
udp_lib_checksum_complete(skb))
goto csum_error;

View File

@@ -612,6 +612,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
}
}
prefetch(&sk->sk_rmem_alloc);
if (rcu_access_pointer(sk->sk_filter) &&
udp_lib_checksum_complete(skb))
goto csum_error;