mirror of
https://github.com/Evolution-X-Devices/kernel_google_b1c1
synced 2026-01-27 17:26:47 +00:00
Merge: Performance improvements.
This patchset brings some performance improvements and the addition of the LZO-RLE
algorithm to the kernel, also usable in zram (yup, tested, works but LZ4 is still ok for us).
The main performance improvement is for SWAP space: the locking has changed and
the swap cache is now split in 64MB trunks.
This gives us a reduction of the median page fault latency of 375%, from 15uS to 4uS,
and an improvement of 192% on the swap throughput (this includes "virtual" swap
devices, like zRAM!). The real world user experience improvement of this on a mobile
device is seen after a day or two of usage, where it usually starts losing just a little
performance due to the large amount of apps kept open in background: now I cannot
notice any more performance loss and the user experience is now basically the same as
if the phone was in its first 2 hours of boot life.
Other performance improvements include, in short:
UDP v4/v6: 10% more performance on single RX queue
Userspace applications will be faster when checking running time of threads
2-5% improvements on heavy multipliers (yeah, not a lot, but was totally free...)
Improvements on rare conditions during sparsetruncate of about 0.3% to a
way more rare around 20% improvement (that's never gonna happen, but there
is no performance drop anywhere).
Tested on SoMC Tama Akatsuki RoW
This was taken from
Repo:
https://github.com/sonyxperiadev/kernel
PR: 2039 ([2.3.2.r1.4] Performance improvements)
This commit is contained in:
committed by
DhineshCool
parent
2e6317f80a
commit
1ad6cc0c62
@@ -73,6 +73,10 @@ Description
|
||||
They just have to "refill" this credit if they consume extra bytes. This is
|
||||
an implementation design choice independent on the algorithm or encoding.
|
||||
|
||||
For maximum compatibility, both versions are available under different names
|
||||
(lzo and lzo-rle). Differences in the encoding are noted in this document with
|
||||
e.g.: version 1 only.
|
||||
|
||||
Byte sequences
|
||||
|
||||
First byte encoding :
|
||||
@@ -134,6 +138,11 @@ Byte sequences
|
||||
state = S (copy S literals after this block)
|
||||
End of stream is reached if distance == 16384
|
||||
|
||||
In version 1 only, this instruction is also used to encode a run of
|
||||
zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1.
|
||||
In this case, it is followed by a fourth byte, X.
|
||||
run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4.
|
||||
|
||||
0 0 1 L L L L L (32..63)
|
||||
Copy of small block within 16kB distance (preferably less than 34B)
|
||||
length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
|
||||
@@ -158,7 +167,9 @@ Byte sequences
|
||||
Authors
|
||||
|
||||
This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an
|
||||
analysis of the decompression code available in Linux 3.16-rc5. The code is
|
||||
tricky, it is possible that this document contains mistakes or that a few
|
||||
corner cases were overlooked. In any case, please report any doubt, fix, or
|
||||
proposed updates to the author(s) so that the document can be updated.
|
||||
analysis of the decompression code available in Linux 3.16-rc5, and updated
|
||||
by Dave Rodgman <dave.rodgman@arm.com> on 2018/10/30 to introduce run-length
|
||||
encoding. The code is tricky, it is possible that this document contains
|
||||
mistakes or that a few corner cases were overlooked. In any case, please
|
||||
report any doubt, fix, or proposed updates to the author(s) so that the
|
||||
document can be updated.
|
||||
|
||||
@@ -9,6 +9,7 @@ config ARM64
|
||||
select ARCH_HAS_DEVMEM_IS_ALLOWED
|
||||
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
|
||||
select ARCH_HAS_ELF_RANDOMIZE
|
||||
select ARCH_HAS_FAST_MULTIPLIER
|
||||
select ARCH_HAS_FORTIFY_SOURCE
|
||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||
select ARCH_HAS_GIGANTIC_PAGE
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <asm/virt.h>
|
||||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/perf/arm_pmu.h>
|
||||
#include <linux/platform_device.h>
|
||||
@@ -1233,3 +1234,32 @@ static struct platform_driver armv8_pmu_driver = {
|
||||
};
|
||||
|
||||
builtin_platform_driver(armv8_pmu_driver);
|
||||
|
||||
void arch_perf_update_userpage(struct perf_event *event,
|
||||
struct perf_event_mmap_page *userpg, u64 now)
|
||||
{
|
||||
u32 freq;
|
||||
u32 shift;
|
||||
|
||||
/*
|
||||
* Internal timekeeping for enabled/running/stopped times
|
||||
* is always computed with the sched_clock.
|
||||
*/
|
||||
freq = arch_timer_get_rate();
|
||||
userpg->cap_user_time = 1;
|
||||
|
||||
clocks_calc_mult_shift(&userpg->time_mult, &shift, freq,
|
||||
NSEC_PER_SEC, 0);
|
||||
/*
|
||||
* time_shift is not expected to be greater than 31 due to
|
||||
* the original published conversion algorithm shifting a
|
||||
* 32-bit value (now specifies a 64-bit value) - refer
|
||||
* perf_event_mmap_page documentation in perf_event.h.
|
||||
*/
|
||||
if (shift == 32) {
|
||||
shift = 31;
|
||||
userpg->time_mult >>= 1;
|
||||
}
|
||||
userpg->time_shift = (u16)shift;
|
||||
userpg->time_offset = -now;
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
|
||||
obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o
|
||||
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
|
||||
obj-$(CONFIG_CRYPTO_LZO) += lzo.o
|
||||
obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
|
||||
obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
|
||||
obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
|
||||
obj-$(CONFIG_CRYPTO_842) += 842.o
|
||||
|
||||
175
crypto/lzo-rle.c
Normal file
175
crypto/lzo-rle.c
Normal file
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/lzo.h>
|
||||
#include <crypto/internal/scompress.h>
|
||||
|
||||
struct lzorle_ctx {
|
||||
void *lzorle_comp_mem;
|
||||
};
|
||||
|
||||
static void *lzorle_alloc_ctx(struct crypto_scomp *tfm)
|
||||
{
|
||||
void *ctx;
|
||||
|
||||
ctx = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
|
||||
if (!ctx)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static int lzorle_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
ctx->lzorle_comp_mem = lzorle_alloc_ctx(NULL);
|
||||
if (IS_ERR(ctx->lzorle_comp_mem))
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void lzorle_free_ctx(struct crypto_scomp *tfm, void *ctx)
|
||||
{
|
||||
kvfree(ctx);
|
||||
}
|
||||
|
||||
static void lzorle_exit(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
lzorle_free_ctx(NULL, ctx->lzorle_comp_mem);
|
||||
}
|
||||
|
||||
static int __lzorle_compress(const u8 *src, unsigned int slen,
|
||||
u8 *dst, unsigned int *dlen, void *ctx)
|
||||
{
|
||||
size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
|
||||
int err;
|
||||
|
||||
err = lzorle1x_1_compress(src, slen, dst, &tmp_len, ctx);
|
||||
|
||||
if (err != LZO_E_OK)
|
||||
return -EINVAL;
|
||||
|
||||
*dlen = tmp_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lzorle_compress(struct crypto_tfm *tfm, const u8 *src,
|
||||
unsigned int slen, u8 *dst, unsigned int *dlen)
|
||||
{
|
||||
struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
return __lzorle_compress(src, slen, dst, dlen, ctx->lzorle_comp_mem);
|
||||
}
|
||||
|
||||
static int lzorle_scompress(struct crypto_scomp *tfm, const u8 *src,
|
||||
unsigned int slen, u8 *dst, unsigned int *dlen,
|
||||
void *ctx)
|
||||
{
|
||||
return __lzorle_compress(src, slen, dst, dlen, ctx);
|
||||
}
|
||||
|
||||
static int __lzorle_decompress(const u8 *src, unsigned int slen,
|
||||
u8 *dst, unsigned int *dlen)
|
||||
{
|
||||
int err;
|
||||
size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
|
||||
|
||||
err = lzo1x_decompress_safe(src, slen, dst, &tmp_len);
|
||||
|
||||
if (err != LZO_E_OK)
|
||||
return -EINVAL;
|
||||
|
||||
*dlen = tmp_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lzorle_decompress(struct crypto_tfm *tfm, const u8 *src,
|
||||
unsigned int slen, u8 *dst, unsigned int *dlen)
|
||||
{
|
||||
return __lzorle_decompress(src, slen, dst, dlen);
|
||||
}
|
||||
|
||||
static int lzorle_sdecompress(struct crypto_scomp *tfm, const u8 *src,
|
||||
unsigned int slen, u8 *dst, unsigned int *dlen,
|
||||
void *ctx)
|
||||
{
|
||||
return __lzorle_decompress(src, slen, dst, dlen);
|
||||
}
|
||||
|
||||
static struct crypto_alg alg = {
|
||||
.cra_name = "lzo-rle",
|
||||
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
|
||||
.cra_ctxsize = sizeof(struct lzorle_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = lzorle_init,
|
||||
.cra_exit = lzorle_exit,
|
||||
.cra_u = { .compress = {
|
||||
.coa_compress = lzorle_compress,
|
||||
.coa_decompress = lzorle_decompress } }
|
||||
};
|
||||
|
||||
static struct scomp_alg scomp = {
|
||||
.alloc_ctx = lzorle_alloc_ctx,
|
||||
.free_ctx = lzorle_free_ctx,
|
||||
.compress = lzorle_scompress,
|
||||
.decompress = lzorle_sdecompress,
|
||||
.base = {
|
||||
.cra_name = "lzo-rle",
|
||||
.cra_driver_name = "lzo-rle-scomp",
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init lzorle_mod_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = crypto_register_alg(&alg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = crypto_register_scomp(&scomp);
|
||||
if (ret) {
|
||||
crypto_unregister_alg(&alg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit lzorle_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&alg);
|
||||
crypto_unregister_scomp(&scomp);
|
||||
}
|
||||
|
||||
module_init(lzorle_mod_init);
|
||||
module_exit(lzorle_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("LZO-RLE Compression Algorithm");
|
||||
MODULE_ALIAS_CRYPTO("lzo-rle");
|
||||
@@ -73,7 +73,8 @@ static char *check[] = {
|
||||
"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
|
||||
"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt",
|
||||
"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
|
||||
"lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
|
||||
"lzo", "lzo-rle", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384",
|
||||
"sha3-512",
|
||||
NULL
|
||||
};
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
|
||||
static const char * const backends[] = {
|
||||
"lzo",
|
||||
"lzo-rle",
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_LZ4)
|
||||
"lz4",
|
||||
#endif
|
||||
|
||||
@@ -17,12 +17,16 @@
|
||||
#define LZO1X_1_MEM_COMPRESS (8192 * sizeof(unsigned short))
|
||||
#define LZO1X_MEM_COMPRESS LZO1X_1_MEM_COMPRESS
|
||||
|
||||
#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3)
|
||||
#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3 + 2)
|
||||
|
||||
/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */
|
||||
int lzo1x_1_compress(const unsigned char *src, size_t src_len,
|
||||
unsigned char *dst, size_t *dst_len, void *wrkmem);
|
||||
|
||||
/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */
|
||||
int lzorle1x_1_compress(const unsigned char *src, size_t src_len,
|
||||
unsigned char *dst, size_t *dst_len, void *wrkmem);
|
||||
|
||||
/* safe decompression with overrun testing */
|
||||
int lzo1x_decompress_safe(const unsigned char *src, size_t src_len,
|
||||
unsigned char *dst, size_t *dst_len);
|
||||
|
||||
@@ -16,7 +16,8 @@ struct address_space;
|
||||
|
||||
struct pagevec {
|
||||
unsigned char nr;
|
||||
unsigned long cold;
|
||||
bool cold;
|
||||
bool drained;
|
||||
struct page *pages[PAGEVEC_SIZE];
|
||||
};
|
||||
|
||||
@@ -45,6 +46,7 @@ static inline void pagevec_init(struct pagevec *pvec, int cold)
|
||||
{
|
||||
pvec->nr = 0;
|
||||
pvec->cold = cold;
|
||||
pvec->drained = false;
|
||||
}
|
||||
|
||||
static inline void pagevec_reinit(struct pagevec *pvec)
|
||||
|
||||
@@ -27,6 +27,7 @@ struct bio;
|
||||
#define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
|
||||
SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
|
||||
SWAP_FLAG_DISCARD_PAGES)
|
||||
#define SWAP_BATCH 64
|
||||
|
||||
static inline int current_is_kswapd(void)
|
||||
{
|
||||
@@ -178,6 +179,12 @@ enum {
|
||||
* protected by swap_info_struct.lock.
|
||||
*/
|
||||
struct swap_cluster_info {
|
||||
spinlock_t lock; /*
|
||||
* Protect swap_cluster_info fields
|
||||
* and swap_info_struct->swap_map
|
||||
* elements correspond to the swap
|
||||
* cluster
|
||||
*/
|
||||
unsigned int data:24;
|
||||
unsigned int flags:8;
|
||||
};
|
||||
@@ -365,8 +372,13 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
|
||||
sector_t *);
|
||||
|
||||
/* linux/mm/swap_state.c */
|
||||
extern struct address_space swapper_spaces[];
|
||||
#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
|
||||
/* One swap address space for each 64M swap space */
|
||||
#define SWAP_ADDRESS_SPACE_SHIFT 14
|
||||
#define SWAP_ADDRESS_SPACE_PAGES (1 << SWAP_ADDRESS_SPACE_SHIFT)
|
||||
extern struct address_space *swapper_spaces[];
|
||||
#define swap_address_space(entry) \
|
||||
(&swapper_spaces[swp_type(entry)][swp_offset(entry) \
|
||||
>> SWAP_ADDRESS_SPACE_SHIFT])
|
||||
extern unsigned long total_swapcache_pages(void);
|
||||
extern void show_swap_cache_info(void);
|
||||
extern int add_to_swap(struct page *, struct list_head *list);
|
||||
@@ -389,6 +401,7 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
|
||||
extern atomic_long_t nr_swap_pages;
|
||||
extern long total_swap_pages;
|
||||
extern bool is_swap_fast(swp_entry_t entry);
|
||||
extern bool has_usable_swap(void);
|
||||
|
||||
/* Swap 50% full? Release swapcache more aggressively.. */
|
||||
static inline bool vm_swap_full(struct swap_info_struct *si)
|
||||
@@ -411,23 +424,31 @@ static inline long get_nr_swap_pages(void)
|
||||
extern void si_swapinfo(struct sysinfo *);
|
||||
extern swp_entry_t get_swap_page(void);
|
||||
extern swp_entry_t get_swap_page_of_type(int);
|
||||
extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
|
||||
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
|
||||
extern void swap_shmem_alloc(swp_entry_t);
|
||||
extern int swap_duplicate(swp_entry_t);
|
||||
extern int swapcache_prepare(swp_entry_t);
|
||||
extern void swap_free(swp_entry_t);
|
||||
extern void swapcache_free(swp_entry_t);
|
||||
extern void swapcache_free_entries(swp_entry_t *entries, int n);
|
||||
extern int free_swap_and_cache(swp_entry_t);
|
||||
extern int swap_type_of(dev_t, sector_t, struct block_device **);
|
||||
extern unsigned int count_swap_pages(int, int);
|
||||
extern sector_t map_swap_page(struct page *, struct block_device **);
|
||||
extern sector_t swapdev_block(int, pgoff_t);
|
||||
extern int page_swapcount(struct page *);
|
||||
extern int __swp_swapcount(swp_entry_t entry);
|
||||
extern int swp_swapcount(swp_entry_t entry);
|
||||
extern struct swap_info_struct *page_swap_info(struct page *);
|
||||
extern bool reuse_swap_page(struct page *, int *);
|
||||
extern int try_to_free_swap(struct page *);
|
||||
struct backing_dev_info;
|
||||
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
|
||||
extern void exit_swap_address_space(unsigned int type);
|
||||
|
||||
extern int get_swap_slots(int n, swp_entry_t *slots);
|
||||
extern void swapcache_free_batch(swp_entry_t *entries, int n);
|
||||
|
||||
#else /* CONFIG_SWAP */
|
||||
|
||||
@@ -515,6 +536,11 @@ static inline int page_swapcount(struct page *page)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int __swp_swapcount(swp_entry_t entry)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int swp_swapcount(swp_entry_t entry)
|
||||
{
|
||||
return 0;
|
||||
|
||||
30
include/linux/swap_slots.h
Normal file
30
include/linux/swap_slots.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef _LINUX_SWAP_SLOTS_H
|
||||
#define _LINUX_SWAP_SLOTS_H
|
||||
|
||||
#include <linux/swap.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
#define SWAP_SLOTS_CACHE_SIZE SWAP_BATCH
|
||||
#define THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE (5*SWAP_SLOTS_CACHE_SIZE)
|
||||
#define THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE (2*SWAP_SLOTS_CACHE_SIZE)
|
||||
|
||||
struct swap_slots_cache {
|
||||
bool lock_initialized;
|
||||
struct mutex alloc_lock; /* protects slots, nr, cur */
|
||||
swp_entry_t *slots;
|
||||
int nr;
|
||||
int cur;
|
||||
spinlock_t free_lock; /* protects slots_ret, n_ret */
|
||||
swp_entry_t *slots_ret;
|
||||
int n_ret;
|
||||
};
|
||||
|
||||
void disable_swap_slots_cache_lock(void);
|
||||
void reenable_swap_slots_cache_unlock(void);
|
||||
int enable_swap_slots_cache(void);
|
||||
int free_swap_slot(swp_entry_t entry);
|
||||
|
||||
extern bool swap_slot_cache_enabled;
|
||||
|
||||
#endif /* _LINUX_SWAP_SLOTS_H */
|
||||
@@ -5075,8 +5075,8 @@ void perf_event_update_userpage(struct perf_event *event)
|
||||
|
||||
userpg = rb->user_page;
|
||||
/*
|
||||
* Disable preemption so as to not let the corresponding user-space
|
||||
* spin too long if we get preempted.
|
||||
* Disable preemption to guarantee consistent time stamps are stored to
|
||||
* the user page.
|
||||
*/
|
||||
preempt_disable();
|
||||
++userpg->lock;
|
||||
|
||||
@@ -20,7 +20,8 @@
|
||||
static noinline size_t
|
||||
lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
|
||||
unsigned char *out, size_t *out_len,
|
||||
size_t ti, void *wrkmem)
|
||||
size_t ti, void *wrkmem, signed char *state_offset,
|
||||
const unsigned char bitstream_version)
|
||||
{
|
||||
const unsigned char *ip;
|
||||
unsigned char *op;
|
||||
@@ -35,27 +36,85 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
|
||||
ip += ti < 4 ? 4 - ti : 0;
|
||||
|
||||
for (;;) {
|
||||
const unsigned char *m_pos;
|
||||
const unsigned char *m_pos = NULL;
|
||||
size_t t, m_len, m_off;
|
||||
u32 dv;
|
||||
u32 run_length = 0;
|
||||
literal:
|
||||
ip += 1 + ((ip - ii) >> 5);
|
||||
next:
|
||||
if (unlikely(ip >= ip_end))
|
||||
break;
|
||||
dv = get_unaligned_le32(ip);
|
||||
t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
|
||||
m_pos = in + dict[t];
|
||||
dict[t] = (lzo_dict_t) (ip - in);
|
||||
if (unlikely(dv != get_unaligned_le32(m_pos)))
|
||||
goto literal;
|
||||
|
||||
if (dv == 0 && bitstream_version) {
|
||||
const unsigned char *ir = ip + 4;
|
||||
const unsigned char *limit = ip_end
|
||||
< (ip + MAX_ZERO_RUN_LENGTH + 1)
|
||||
? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1;
|
||||
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
|
||||
defined(LZO_FAST_64BIT_MEMORY_ACCESS)
|
||||
u64 dv64;
|
||||
|
||||
for (; (ir + 32) <= limit; ir += 32) {
|
||||
dv64 = get_unaligned((u64 *)ir);
|
||||
dv64 |= get_unaligned((u64 *)ir + 1);
|
||||
dv64 |= get_unaligned((u64 *)ir + 2);
|
||||
dv64 |= get_unaligned((u64 *)ir + 3);
|
||||
if (dv64)
|
||||
break;
|
||||
}
|
||||
for (; (ir + 8) <= limit; ir += 8) {
|
||||
dv64 = get_unaligned((u64 *)ir);
|
||||
if (dv64) {
|
||||
# if defined(__LITTLE_ENDIAN)
|
||||
ir += __builtin_ctzll(dv64) >> 3;
|
||||
# elif defined(__BIG_ENDIAN)
|
||||
ir += __builtin_clzll(dv64) >> 3;
|
||||
# else
|
||||
# error "missing endian definition"
|
||||
# endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
while ((ir < (const unsigned char *)
|
||||
ALIGN((uintptr_t)ir, 4)) &&
|
||||
(ir < limit) && (*ir == 0))
|
||||
ir++;
|
||||
for (; (ir + 4) <= limit; ir += 4) {
|
||||
dv = *((u32 *)ir);
|
||||
if (dv) {
|
||||
# if defined(__LITTLE_ENDIAN)
|
||||
ir += __builtin_ctz(dv) >> 3;
|
||||
# elif defined(__BIG_ENDIAN)
|
||||
ir += __builtin_clz(dv) >> 3;
|
||||
# else
|
||||
# error "missing endian definition"
|
||||
# endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
while (likely(ir < limit) && unlikely(*ir == 0))
|
||||
ir++;
|
||||
run_length = ir - ip;
|
||||
if (run_length > MAX_ZERO_RUN_LENGTH)
|
||||
run_length = MAX_ZERO_RUN_LENGTH;
|
||||
} else {
|
||||
t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
|
||||
m_pos = in + dict[t];
|
||||
dict[t] = (lzo_dict_t) (ip - in);
|
||||
if (unlikely(dv != get_unaligned_le32(m_pos)))
|
||||
goto literal;
|
||||
}
|
||||
|
||||
ii -= ti;
|
||||
ti = 0;
|
||||
t = ip - ii;
|
||||
if (t != 0) {
|
||||
if (t <= 3) {
|
||||
op[-2] |= t;
|
||||
op[*state_offset] |= t;
|
||||
COPY4(op, ii);
|
||||
op += t;
|
||||
} else if (t <= 16) {
|
||||
@@ -88,6 +147,17 @@ next:
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(run_length)) {
|
||||
ip += run_length;
|
||||
run_length -= MIN_ZERO_RUN_LENGTH;
|
||||
put_unaligned_le32((run_length << 21) | 0xfffc18
|
||||
| (run_length & 0x7), op);
|
||||
op += 4;
|
||||
run_length = 0;
|
||||
*state_offset = -3;
|
||||
goto finished_writing_instruction;
|
||||
}
|
||||
|
||||
m_len = 4;
|
||||
{
|
||||
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
|
||||
@@ -170,7 +240,6 @@ m_len_done:
|
||||
|
||||
m_off = ip - m_pos;
|
||||
ip += m_len;
|
||||
ii = ip;
|
||||
if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
|
||||
m_off -= 1;
|
||||
*op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
|
||||
@@ -207,29 +276,48 @@ m_len_done:
|
||||
*op++ = (m_off << 2);
|
||||
*op++ = (m_off >> 6);
|
||||
}
|
||||
*state_offset = -2;
|
||||
finished_writing_instruction:
|
||||
ii = ip;
|
||||
goto next;
|
||||
}
|
||||
*out_len = op - out;
|
||||
return in_end - (ii - ti);
|
||||
}
|
||||
|
||||
int lzo1x_1_compress(const unsigned char *in, size_t in_len,
|
||||
int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
|
||||
unsigned char *out, size_t *out_len,
|
||||
void *wrkmem)
|
||||
void *wrkmem, const unsigned char bitstream_version)
|
||||
{
|
||||
const unsigned char *ip = in;
|
||||
unsigned char *op = out;
|
||||
unsigned char *data_start;
|
||||
size_t l = in_len;
|
||||
size_t t = 0;
|
||||
signed char state_offset = -2;
|
||||
unsigned int m4_max_offset;
|
||||
|
||||
// LZO v0 will never write 17 as first byte (except for zero-length
|
||||
// input), so this is used to version the bitstream
|
||||
if (bitstream_version > 0) {
|
||||
*op++ = 17;
|
||||
*op++ = bitstream_version;
|
||||
m4_max_offset = M4_MAX_OFFSET_V1;
|
||||
} else {
|
||||
m4_max_offset = M4_MAX_OFFSET_V0;
|
||||
}
|
||||
|
||||
data_start = op;
|
||||
|
||||
while (l > 20) {
|
||||
size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
|
||||
size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1);
|
||||
uintptr_t ll_end = (uintptr_t) ip + ll;
|
||||
if ((ll_end + ((t + ll) >> 5)) <= ll_end)
|
||||
break;
|
||||
BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
|
||||
memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
|
||||
t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
|
||||
t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem,
|
||||
&state_offset, bitstream_version);
|
||||
ip += ll;
|
||||
op += *out_len;
|
||||
l -= ll;
|
||||
@@ -239,10 +327,10 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
|
||||
if (t > 0) {
|
||||
const unsigned char *ii = in + in_len - t;
|
||||
|
||||
if (op == out && t <= 238) {
|
||||
if (op == data_start && t <= 238) {
|
||||
*op++ = (17 + t);
|
||||
} else if (t <= 3) {
|
||||
op[-2] |= t;
|
||||
op[state_offset] |= t;
|
||||
} else if (t <= 18) {
|
||||
*op++ = (t - 3);
|
||||
} else {
|
||||
@@ -273,7 +361,24 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
|
||||
*out_len = op - out;
|
||||
return LZO_E_OK;
|
||||
}
|
||||
|
||||
int lzo1x_1_compress(const unsigned char *in, size_t in_len,
|
||||
unsigned char *out, size_t *out_len,
|
||||
void *wrkmem)
|
||||
{
|
||||
return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0);
|
||||
}
|
||||
|
||||
int lzorle1x_1_compress(const unsigned char *in, size_t in_len,
|
||||
unsigned char *out, size_t *out_len,
|
||||
void *wrkmem)
|
||||
{
|
||||
return lzogeneric1x_1_compress(in, in_len, out, out_len,
|
||||
wrkmem, LZO_VERSION);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(lzo1x_1_compress);
|
||||
EXPORT_SYMBOL_GPL(lzorle1x_1_compress);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("LZO1X-1 Compressor");
|
||||
|
||||
@@ -46,11 +46,21 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
|
||||
const unsigned char * const ip_end = in + in_len;
|
||||
unsigned char * const op_end = out + *out_len;
|
||||
|
||||
unsigned char bitstream_version;
|
||||
|
||||
op = out;
|
||||
ip = in;
|
||||
|
||||
if (unlikely(in_len < 3))
|
||||
goto input_overrun;
|
||||
|
||||
if (likely(in_len >= 5) && likely(*ip == 17)) {
|
||||
bitstream_version = ip[1];
|
||||
ip += 2;
|
||||
} else {
|
||||
bitstream_version = 0;
|
||||
}
|
||||
|
||||
if (*ip > 17) {
|
||||
t = *ip++ - 17;
|
||||
if (t < 4) {
|
||||
@@ -154,32 +164,49 @@ copy_literal_run:
|
||||
m_pos -= next >> 2;
|
||||
next &= 3;
|
||||
} else {
|
||||
m_pos = op;
|
||||
m_pos -= (t & 8) << 11;
|
||||
t = (t & 7) + (3 - 1);
|
||||
if (unlikely(t == 2)) {
|
||||
size_t offset;
|
||||
const unsigned char *ip_last = ip;
|
||||
|
||||
while (unlikely(*ip == 0)) {
|
||||
ip++;
|
||||
NEED_IP(1);
|
||||
}
|
||||
offset = ip - ip_last;
|
||||
if (unlikely(offset > MAX_255_COUNT))
|
||||
return LZO_E_ERROR;
|
||||
|
||||
offset = (offset << 8) - offset;
|
||||
t += offset + 7 + *ip++;
|
||||
NEED_IP(2);
|
||||
}
|
||||
NEED_IP(2);
|
||||
next = get_unaligned_le16(ip);
|
||||
ip += 2;
|
||||
m_pos -= next >> 2;
|
||||
next &= 3;
|
||||
if (m_pos == op)
|
||||
goto eof_found;
|
||||
m_pos -= 0x4000;
|
||||
if (((next & 0xfffc) == 0xfffc) &&
|
||||
((t & 0xf8) == 0x18) &&
|
||||
likely(bitstream_version)) {
|
||||
NEED_IP(3);
|
||||
t &= 7;
|
||||
t |= ip[2] << 3;
|
||||
t += MIN_ZERO_RUN_LENGTH;
|
||||
NEED_OP(t);
|
||||
memset(op, 0, t);
|
||||
op += t;
|
||||
next &= 3;
|
||||
ip += 3;
|
||||
goto match_next;
|
||||
} else {
|
||||
m_pos = op;
|
||||
m_pos -= (t & 8) << 11;
|
||||
t = (t & 7) + (3 - 1);
|
||||
if (unlikely(t == 2)) {
|
||||
size_t offset;
|
||||
const unsigned char *ip_last = ip;
|
||||
|
||||
while (unlikely(*ip == 0)) {
|
||||
ip++;
|
||||
NEED_IP(1);
|
||||
}
|
||||
offset = ip - ip_last;
|
||||
if (unlikely(offset > MAX_255_COUNT))
|
||||
return LZO_E_ERROR;
|
||||
|
||||
offset = (offset << 8) - offset;
|
||||
t += offset + 7 + *ip++;
|
||||
NEED_IP(2);
|
||||
next = get_unaligned_le16(ip);
|
||||
}
|
||||
ip += 2;
|
||||
m_pos -= next >> 2;
|
||||
next &= 3;
|
||||
if (m_pos == op)
|
||||
goto eof_found;
|
||||
m_pos -= 0x4000;
|
||||
}
|
||||
}
|
||||
TEST_LB(m_pos);
|
||||
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* lzodefs.h -- architecture, OS and compiler specific defines
|
||||
*
|
||||
@@ -12,9 +13,15 @@
|
||||
*/
|
||||
|
||||
|
||||
/* Version
|
||||
* 0: original lzo version
|
||||
* 1: lzo with support for RLE
|
||||
*/
|
||||
#define LZO_VERSION 1
|
||||
|
||||
#define COPY4(dst, src) \
|
||||
put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
|
||||
#if defined(__x86_64__)
|
||||
#if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
|
||||
#define COPY8(dst, src) \
|
||||
put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst))
|
||||
#else
|
||||
@@ -24,19 +31,21 @@
|
||||
|
||||
#if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
|
||||
#error "conflicting endian definitions"
|
||||
#elif defined(__x86_64__)
|
||||
#elif defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
|
||||
#define LZO_USE_CTZ64 1
|
||||
#define LZO_USE_CTZ32 1
|
||||
#elif defined(__i386__) || defined(__powerpc__)
|
||||
#define LZO_FAST_64BIT_MEMORY_ACCESS
|
||||
#elif defined(CONFIG_X86) || defined(CONFIG_PPC)
|
||||
#define LZO_USE_CTZ32 1
|
||||
#elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5)
|
||||
#elif defined(CONFIG_ARM) && (__LINUX_ARM_ARCH__ >= 5)
|
||||
#define LZO_USE_CTZ32 1
|
||||
#endif
|
||||
|
||||
#define M1_MAX_OFFSET 0x0400
|
||||
#define M2_MAX_OFFSET 0x0800
|
||||
#define M3_MAX_OFFSET 0x4000
|
||||
#define M4_MAX_OFFSET 0xbfff
|
||||
#define M4_MAX_OFFSET_V0 0xbfff
|
||||
#define M4_MAX_OFFSET_V1 0xbffe
|
||||
|
||||
#define M1_MIN_LEN 2
|
||||
#define M1_MAX_LEN 2
|
||||
@@ -52,6 +61,9 @@
|
||||
#define M3_MARKER 32
|
||||
#define M4_MARKER 16
|
||||
|
||||
#define MIN_ZERO_RUN_LENGTH 4
|
||||
#define MAX_ZERO_RUN_LENGTH (2047 + MIN_ZERO_RUN_LENGTH)
|
||||
|
||||
#define lzo_dict_t unsigned short
|
||||
#define D_BITS 13
|
||||
#define D_SIZE (1u << D_BITS)
|
||||
|
||||
@@ -38,7 +38,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
|
||||
readahead.o swap.o truncate.o vmscan.o shmem.o \
|
||||
util.o mmzone.o vmstat.o backing-dev.o \
|
||||
mm_init.o mmu_context.o percpu.o slab_common.o \
|
||||
compaction.o vmacache.o \
|
||||
compaction.o vmacache.o swap_slots.o \
|
||||
interval_tree.o list_lru.o workingset.o \
|
||||
debug.o $(mmu-y) showmem.o vmpressure.o
|
||||
|
||||
|
||||
12
mm/swap.c
12
mm/swap.c
@@ -810,7 +810,10 @@ EXPORT_SYMBOL(release_pages);
|
||||
*/
|
||||
void __pagevec_release(struct pagevec *pvec)
|
||||
{
|
||||
lru_add_drain();
|
||||
if (!pvec->drained) {
|
||||
lru_add_drain();
|
||||
pvec->drained = true;
|
||||
}
|
||||
release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
|
||||
pagevec_reinit(pvec);
|
||||
}
|
||||
@@ -981,13 +984,6 @@ EXPORT_SYMBOL(pagevec_lookup_range_nr_tag);
|
||||
*/
|
||||
void __init swap_setup(void)
|
||||
{
|
||||
#ifdef CONFIG_SWAP
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MAX_SWAPFILES; i++)
|
||||
spin_lock_init(&swapper_spaces[i].tree_lock);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Right now other parts of the system means that we
|
||||
* _really_ don't want to cluster much more
|
||||
|
||||
@@ -17,8 +17,9 @@
|
||||
|
||||
#define SWAP_RATIO_GROUP_START (SWAP_FLAG_PRIO_MASK - 9) /* 32758 */
|
||||
#define SWAP_RATIO_GROUP_END (SWAP_FLAG_PRIO_MASK) /* 32767 */
|
||||
#define SWAP_FAST_WRITES (SWAPFILE_CLUSTER * (SWAP_CLUSTER_MAX / 8))
|
||||
#define SWAP_SLOW_WRITES SWAPFILE_CLUSTER
|
||||
#define SWAP_FAST_WRITES \
|
||||
((SWAPFILE_CLUSTER * (SWAP_CLUSTER_MAX / 8)) / SWAP_BATCH)
|
||||
#define SWAP_SLOW_WRITES (SWAPFILE_CLUSTER / SWAP_BATCH)
|
||||
|
||||
/*
|
||||
* The fast/slow swap write ratio.
|
||||
|
||||
345
mm/swap_slots.c
Normal file
345
mm/swap_slots.c
Normal file
@@ -0,0 +1,345 @@
|
||||
/*
|
||||
* Manage cache of swap slots to be used for and returned from
|
||||
* swap.
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* We allocate the swap slots from the global pool and put
|
||||
* it into local per cpu caches. This has the advantage
|
||||
* of no needing to acquire the swap_info lock every time
|
||||
* we need a new slot.
|
||||
*
|
||||
* There is also opportunity to simply return the slot
|
||||
* to local caches without needing to acquire swap_info
|
||||
* lock. We do not reuse the returned slots directly but
|
||||
* move them back to the global pool in a batch. This
|
||||
* allows the slots to coaellesce and reduce fragmentation.
|
||||
*
|
||||
* The swap entry allocated is marked with SWAP_HAS_CACHE
|
||||
* flag in map_count that prevents it from being allocated
|
||||
* again from the global pool.
|
||||
*
|
||||
* The swap slots cache is protected by a mutex instead of
|
||||
* a spin lock as when we search for slots with scan_swap_map,
|
||||
* we can possibly sleep.
|
||||
*/
|
||||
|
||||
#include <linux/swap_slots.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#ifdef CONFIG_SWAP
|
||||
|
||||
static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
|
||||
static bool swap_slot_cache_active;
|
||||
bool swap_slot_cache_enabled;
|
||||
static bool swap_slot_cache_initialized;
|
||||
DEFINE_MUTEX(swap_slots_cache_mutex);
|
||||
/* Serialize swap slots cache enable/disable operations */
|
||||
DEFINE_MUTEX(swap_slots_cache_enable_mutex);
|
||||
|
||||
static void __drain_swap_slots_cache(unsigned int type);
|
||||
static void deactivate_swap_slots_cache(void);
|
||||
static void reactivate_swap_slots_cache(void);
|
||||
|
||||
#define use_swap_slot_cache (swap_slot_cache_active && \
|
||||
swap_slot_cache_enabled && swap_slot_cache_initialized)
|
||||
#define SLOTS_CACHE 0x1
|
||||
#define SLOTS_CACHE_RET 0x2
|
||||
|
||||
static void deactivate_swap_slots_cache(void)
|
||||
{
|
||||
mutex_lock(&swap_slots_cache_mutex);
|
||||
swap_slot_cache_active = false;
|
||||
__drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
|
||||
mutex_unlock(&swap_slots_cache_mutex);
|
||||
}
|
||||
|
||||
static void reactivate_swap_slots_cache(void)
|
||||
{
|
||||
mutex_lock(&swap_slots_cache_mutex);
|
||||
swap_slot_cache_active = true;
|
||||
mutex_unlock(&swap_slots_cache_mutex);
|
||||
}
|
||||
|
||||
/* Must not be called with cpu hot plug lock */
|
||||
void disable_swap_slots_cache_lock(void)
|
||||
{
|
||||
mutex_lock(&swap_slots_cache_enable_mutex);
|
||||
swap_slot_cache_enabled = false;
|
||||
if (swap_slot_cache_initialized) {
|
||||
/* serialize with cpu hotplug operations */
|
||||
get_online_cpus();
|
||||
__drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
|
||||
put_online_cpus();
|
||||
}
|
||||
}
|
||||
|
||||
static void __reenable_swap_slots_cache(void)
|
||||
{
|
||||
swap_slot_cache_enabled = has_usable_swap();
|
||||
}
|
||||
|
||||
void reenable_swap_slots_cache_unlock(void)
|
||||
{
|
||||
__reenable_swap_slots_cache();
|
||||
mutex_unlock(&swap_slots_cache_enable_mutex);
|
||||
}
|
||||
|
||||
static bool check_cache_active(void)
|
||||
{
|
||||
long pages;
|
||||
|
||||
if (!swap_slot_cache_enabled || !swap_slot_cache_initialized)
|
||||
return false;
|
||||
|
||||
pages = get_nr_swap_pages();
|
||||
if (!swap_slot_cache_active) {
|
||||
if (pages > num_online_cpus() *
|
||||
THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE)
|
||||
reactivate_swap_slots_cache();
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* if global pool of slot caches too low, deactivate cache */
|
||||
if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE)
|
||||
deactivate_swap_slots_cache();
|
||||
out:
|
||||
return swap_slot_cache_active;
|
||||
}
|
||||
|
||||
static int alloc_swap_slot_cache(unsigned int cpu)
|
||||
{
|
||||
struct swap_slots_cache *cache;
|
||||
swp_entry_t *slots, *slots_ret;
|
||||
|
||||
/*
|
||||
* Do allocation outside swap_slots_cache_mutex
|
||||
* as kvzalloc could trigger reclaim and get_swap_page,
|
||||
* which can lock swap_slots_cache_mutex.
|
||||
*/
|
||||
slots = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
|
||||
GFP_KERNEL);
|
||||
if (!slots)
|
||||
return -ENOMEM;
|
||||
|
||||
slots_ret = kvzalloc(sizeof(swp_entry_t) * SWAP_SLOTS_CACHE_SIZE,
|
||||
GFP_KERNEL);
|
||||
if (!slots_ret) {
|
||||
kvfree(slots);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mutex_lock(&swap_slots_cache_mutex);
|
||||
cache = &per_cpu(swp_slots, cpu);
|
||||
if (cache->slots || cache->slots_ret)
|
||||
/* cache already allocated */
|
||||
goto out;
|
||||
if (!cache->lock_initialized) {
|
||||
mutex_init(&cache->alloc_lock);
|
||||
spin_lock_init(&cache->free_lock);
|
||||
cache->lock_initialized = true;
|
||||
}
|
||||
cache->nr = 0;
|
||||
cache->cur = 0;
|
||||
cache->n_ret = 0;
|
||||
cache->slots = slots;
|
||||
slots = NULL;
|
||||
cache->slots_ret = slots_ret;
|
||||
slots_ret = NULL;
|
||||
out:
|
||||
mutex_unlock(&swap_slots_cache_mutex);
|
||||
if (slots)
|
||||
kvfree(slots);
|
||||
if (slots_ret)
|
||||
kvfree(slots_ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
|
||||
bool free_slots)
|
||||
{
|
||||
struct swap_slots_cache *cache;
|
||||
swp_entry_t *slots = NULL;
|
||||
|
||||
cache = &per_cpu(swp_slots, cpu);
|
||||
if ((type & SLOTS_CACHE) && cache->slots) {
|
||||
mutex_lock(&cache->alloc_lock);
|
||||
swapcache_free_entries(cache->slots + cache->cur, cache->nr);
|
||||
cache->cur = 0;
|
||||
cache->nr = 0;
|
||||
if (free_slots && cache->slots) {
|
||||
kvfree(cache->slots);
|
||||
cache->slots = NULL;
|
||||
}
|
||||
mutex_unlock(&cache->alloc_lock);
|
||||
}
|
||||
if ((type & SLOTS_CACHE_RET) && cache->slots_ret) {
|
||||
spin_lock_irq(&cache->free_lock);
|
||||
swapcache_free_entries(cache->slots_ret, cache->n_ret);
|
||||
cache->n_ret = 0;
|
||||
if (free_slots && cache->slots_ret) {
|
||||
slots = cache->slots_ret;
|
||||
cache->slots_ret = NULL;
|
||||
}
|
||||
spin_unlock_irq(&cache->free_lock);
|
||||
if (slots)
|
||||
kvfree(slots);
|
||||
}
|
||||
}
|
||||
|
||||
static void __drain_swap_slots_cache(unsigned int type)
|
||||
{
|
||||
unsigned int cpu;
|
||||
|
||||
/*
|
||||
* This function is called during
|
||||
* 1) swapoff, when we have to make sure no
|
||||
* left over slots are in cache when we remove
|
||||
* a swap device;
|
||||
* 2) disabling of swap slot cache, when we run low
|
||||
* on swap slots when allocating memory and need
|
||||
* to return swap slots to global pool.
|
||||
*
|
||||
* We cannot acquire cpu hot plug lock here as
|
||||
* this function can be invoked in the cpu
|
||||
* hot plug path:
|
||||
* cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
|
||||
* -> memory allocation -> direct reclaim -> get_swap_page
|
||||
* -> drain_swap_slots_cache
|
||||
*
|
||||
* Hence the loop over current online cpu below could miss cpu that
|
||||
* is being brought online but not yet marked as online.
|
||||
* That is okay as we do not schedule and run anything on a
|
||||
* cpu before it has been marked online. Hence, we will not
|
||||
* fill any swap slots in slots cache of such cpu.
|
||||
* There are no slots on such cpu that need to be drained.
|
||||
*/
|
||||
for_each_online_cpu(cpu)
|
||||
drain_slots_cache_cpu(cpu, type, false);
|
||||
}
|
||||
|
||||
static int free_slot_cache(unsigned int cpu)
|
||||
{
|
||||
mutex_lock(&swap_slots_cache_mutex);
|
||||
drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true);
|
||||
mutex_unlock(&swap_slots_cache_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int enable_swap_slots_cache(void)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&swap_slots_cache_enable_mutex);
|
||||
if (swap_slot_cache_initialized) {
|
||||
__reenable_swap_slots_cache();
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache",
|
||||
alloc_swap_slot_cache, free_slot_cache);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
swap_slot_cache_initialized = true;
|
||||
__reenable_swap_slots_cache();
|
||||
out_unlock:
|
||||
mutex_unlock(&swap_slots_cache_enable_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* called with swap slot cache's alloc lock held */
|
||||
static int refill_swap_slots_cache(struct swap_slots_cache *cache)
|
||||
{
|
||||
if (!use_swap_slot_cache || cache->nr)
|
||||
return 0;
|
||||
|
||||
cache->cur = 0;
|
||||
if (swap_slot_cache_active)
|
||||
cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, cache->slots);
|
||||
|
||||
return cache->nr;
|
||||
}
|
||||
|
||||
int free_swap_slot(swp_entry_t entry)
|
||||
{
|
||||
struct swap_slots_cache *cache;
|
||||
|
||||
BUG_ON(!swap_slot_cache_initialized);
|
||||
|
||||
cache = &get_cpu_var(swp_slots);
|
||||
if (use_swap_slot_cache && cache->slots_ret) {
|
||||
spin_lock_irq(&cache->free_lock);
|
||||
/* Swap slots cache may be deactivated before acquiring lock */
|
||||
if (!use_swap_slot_cache) {
|
||||
spin_unlock_irq(&cache->free_lock);
|
||||
goto direct_free;
|
||||
}
|
||||
if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) {
|
||||
/*
|
||||
* Return slots to global pool.
|
||||
* The current swap_map value is SWAP_HAS_CACHE.
|
||||
* Set it to 0 to indicate it is available for
|
||||
* allocation in global pool
|
||||
*/
|
||||
swapcache_free_entries(cache->slots_ret, cache->n_ret);
|
||||
cache->n_ret = 0;
|
||||
}
|
||||
cache->slots_ret[cache->n_ret++] = entry;
|
||||
spin_unlock_irq(&cache->free_lock);
|
||||
} else {
|
||||
direct_free:
|
||||
swapcache_free_entries(&entry, 1);
|
||||
}
|
||||
put_cpu_var(swp_slots);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
swp_entry_t get_swap_page(void)
|
||||
{
|
||||
swp_entry_t entry, *pentry;
|
||||
struct swap_slots_cache *cache;
|
||||
|
||||
/*
|
||||
* Preemption is allowed here, because we may sleep
|
||||
* in refill_swap_slots_cache(). But it is safe, because
|
||||
* accesses to the per-CPU data structure are protected by the
|
||||
* mutex cache->alloc_lock.
|
||||
*
|
||||
* The alloc path here does not touch cache->slots_ret
|
||||
* so cache->free_lock is not taken.
|
||||
*/
|
||||
cache = raw_cpu_ptr(&swp_slots);
|
||||
|
||||
entry.val = 0;
|
||||
if (check_cache_active()) {
|
||||
mutex_lock(&cache->alloc_lock);
|
||||
if (cache->slots) {
|
||||
repeat:
|
||||
if (cache->nr) {
|
||||
pentry = &cache->slots[cache->cur++];
|
||||
entry = *pentry;
|
||||
pentry->val = 0;
|
||||
cache->nr--;
|
||||
} else {
|
||||
if (refill_swap_slots_cache(cache))
|
||||
goto repeat;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&cache->alloc_lock);
|
||||
if (entry.val)
|
||||
return entry;
|
||||
}
|
||||
|
||||
get_swap_pages(1, &entry);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SWAP */
|
||||
@@ -17,6 +17,8 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/swap_slots.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include "internal.h"
|
||||
@@ -33,15 +35,8 @@ static const struct address_space_operations swap_aops = {
|
||||
#endif
|
||||
};
|
||||
|
||||
struct address_space swapper_spaces[MAX_SWAPFILES] = {
|
||||
[0 ... MAX_SWAPFILES - 1] = {
|
||||
.page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
|
||||
.i_mmap_writable = ATOMIC_INIT(0),
|
||||
.a_ops = &swap_aops,
|
||||
/* swap cache doesn't use writeback related tags */
|
||||
.flags = 1 << AS_NO_WRITEBACK_TAGS,
|
||||
}
|
||||
};
|
||||
struct address_space *swapper_spaces[MAX_SWAPFILES];
|
||||
static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
|
||||
|
||||
#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
|
||||
|
||||
@@ -54,11 +49,26 @@ static struct {
|
||||
|
||||
unsigned long total_swapcache_pages(void)
|
||||
{
|
||||
int i;
|
||||
unsigned int i, j, nr;
|
||||
unsigned long ret = 0;
|
||||
struct address_space *spaces;
|
||||
|
||||
for (i = 0; i < MAX_SWAPFILES; i++)
|
||||
ret += swapper_spaces[i].nrpages;
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < MAX_SWAPFILES; i++) {
|
||||
/*
|
||||
* The corresponding entries in nr_swapper_spaces and
|
||||
* swapper_spaces will be reused only after at least
|
||||
* one grace period. So it is impossible for them
|
||||
* belongs to different usage.
|
||||
*/
|
||||
nr = nr_swapper_spaces[i];
|
||||
spaces = rcu_dereference(swapper_spaces[i]);
|
||||
if (!nr || !spaces)
|
||||
continue;
|
||||
for (j = 0; j < nr; j++)
|
||||
ret += spaces[j].nrpages;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -315,6 +325,17 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
|
||||
if (found_page)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Just skip read ahead for unused swap slot.
|
||||
* During swap_off when swap_slot_cache is disabled,
|
||||
* we have to handle the race between putting
|
||||
* swap entry in swap cache and marking swap slot
|
||||
* as SWAP_HAS_CACHE. That's done in later part of code or
|
||||
* else swap_off will be aborted if we return NULL.
|
||||
*/
|
||||
if (!__swp_swapcount(entry) && swap_slot_cache_enabled)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Get a new page to read into from swap.
|
||||
*/
|
||||
@@ -507,3 +528,38 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
|
||||
skip:
|
||||
return read_swap_cache_async(entry, gfp_mask, vma, addr);
|
||||
}
|
||||
|
||||
int init_swap_address_space(unsigned int type, unsigned long nr_pages)
|
||||
{
|
||||
struct address_space *spaces, *space;
|
||||
unsigned int i, nr;
|
||||
|
||||
nr = DIV_ROUND_UP(nr_pages, SWAP_ADDRESS_SPACE_PAGES);
|
||||
spaces = kvzalloc(sizeof(struct address_space) * nr, GFP_KERNEL);
|
||||
if (!spaces)
|
||||
return -ENOMEM;
|
||||
for (i = 0; i < nr; i++) {
|
||||
space = spaces + i;
|
||||
INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN);
|
||||
atomic_set(&space->i_mmap_writable, 0);
|
||||
space->a_ops = &swap_aops;
|
||||
/* swap cache doesn't use writeback related tags */
|
||||
mapping_set_no_writeback_tags(space);
|
||||
spin_lock_init(&space->tree_lock);
|
||||
}
|
||||
nr_swapper_spaces[type] = nr;
|
||||
rcu_assign_pointer(swapper_spaces[type], spaces);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void exit_swap_address_space(unsigned int type)
|
||||
{
|
||||
struct address_space *spaces;
|
||||
|
||||
spaces = swapper_spaces[type];
|
||||
nr_swapper_spaces[type] = 0;
|
||||
rcu_assign_pointer(swapper_spaces[type], NULL);
|
||||
synchronize_rcu();
|
||||
kvfree(spaces);
|
||||
}
|
||||
|
||||
551
mm/swapfile.c
551
mm/swapfile.c
@@ -34,6 +34,7 @@
|
||||
#include <linux/frontswap.h>
|
||||
#include <linux/swapfile.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/swap_slots.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
@@ -276,6 +277,47 @@ static inline void cluster_set_null(struct swap_cluster_info *info)
|
||||
info->data = 0;
|
||||
}
|
||||
|
||||
static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
|
||||
unsigned long offset)
|
||||
{
|
||||
struct swap_cluster_info *ci;
|
||||
|
||||
ci = si->cluster_info;
|
||||
if (ci) {
|
||||
ci += offset / SWAPFILE_CLUSTER;
|
||||
spin_lock(&ci->lock);
|
||||
}
|
||||
return ci;
|
||||
}
|
||||
|
||||
static inline void unlock_cluster(struct swap_cluster_info *ci)
|
||||
{
|
||||
if (ci)
|
||||
spin_unlock(&ci->lock);
|
||||
}
|
||||
|
||||
static inline struct swap_cluster_info *lock_cluster_or_swap_info(
|
||||
struct swap_info_struct *si,
|
||||
unsigned long offset)
|
||||
{
|
||||
struct swap_cluster_info *ci;
|
||||
|
||||
ci = lock_cluster(si, offset);
|
||||
if (!ci)
|
||||
spin_lock(&si->lock);
|
||||
|
||||
return ci;
|
||||
}
|
||||
|
||||
static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
|
||||
struct swap_cluster_info *ci)
|
||||
{
|
||||
if (ci)
|
||||
unlock_cluster(ci);
|
||||
else
|
||||
spin_unlock(&si->lock);
|
||||
}
|
||||
|
||||
static inline bool cluster_list_empty(struct swap_cluster_list *list)
|
||||
{
|
||||
return cluster_is_null(&list->head);
|
||||
@@ -300,9 +342,17 @@ static void cluster_list_add_tail(struct swap_cluster_list *list,
|
||||
cluster_set_next_flag(&list->head, idx, 0);
|
||||
cluster_set_next_flag(&list->tail, idx, 0);
|
||||
} else {
|
||||
struct swap_cluster_info *ci_tail;
|
||||
unsigned int tail = cluster_next(&list->tail);
|
||||
|
||||
cluster_set_next(&ci[tail], idx);
|
||||
/*
|
||||
* Nested cluster lock, but both cluster locks are
|
||||
* only acquired when we held swap_info_struct->lock
|
||||
*/
|
||||
ci_tail = ci + tail;
|
||||
spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING);
|
||||
cluster_set_next(ci_tail, idx);
|
||||
unlock_cluster(ci_tail);
|
||||
cluster_set_next_flag(&list->tail, idx, 0);
|
||||
}
|
||||
}
|
||||
@@ -347,7 +397,7 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
|
||||
*/
|
||||
static void swap_do_scheduled_discard(struct swap_info_struct *si)
|
||||
{
|
||||
struct swap_cluster_info *info;
|
||||
struct swap_cluster_info *info, *ci;
|
||||
unsigned int idx;
|
||||
|
||||
info = si->cluster_info;
|
||||
@@ -360,10 +410,14 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si)
|
||||
SWAPFILE_CLUSTER);
|
||||
|
||||
spin_lock(&si->lock);
|
||||
cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE);
|
||||
ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
|
||||
cluster_set_flag(ci, CLUSTER_FLAG_FREE);
|
||||
unlock_cluster(ci);
|
||||
cluster_list_add_tail(&si->free_clusters, info, idx);
|
||||
ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
|
||||
memset(si->swap_map + idx * SWAPFILE_CLUSTER,
|
||||
0, SWAPFILE_CLUSTER);
|
||||
unlock_cluster(ci);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -462,12 +516,13 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
|
||||
* Try to get a swap entry from current cpu's swap entry pool (a cluster). This
|
||||
* might involve allocating a new cluster for current CPU too.
|
||||
*/
|
||||
static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
|
||||
static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
|
||||
unsigned long *offset, unsigned long *scan_base)
|
||||
{
|
||||
struct percpu_cluster *cluster;
|
||||
struct swap_cluster_info *ci;
|
||||
bool found_free;
|
||||
unsigned long tmp;
|
||||
unsigned long tmp, max;
|
||||
|
||||
new_cluster:
|
||||
cluster = this_cpu_ptr(si->percpu_cluster);
|
||||
@@ -485,7 +540,7 @@ new_cluster:
|
||||
*scan_base = *offset = si->cluster_next;
|
||||
goto new_cluster;
|
||||
} else
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
found_free = false;
|
||||
@@ -495,14 +550,21 @@ new_cluster:
|
||||
* check if there is still free entry in the cluster
|
||||
*/
|
||||
tmp = cluster->next;
|
||||
while (tmp < si->max && tmp < (cluster_next(&cluster->index) + 1) *
|
||||
SWAPFILE_CLUSTER) {
|
||||
max = min_t(unsigned long, si->max,
|
||||
(cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
|
||||
if (tmp >= max) {
|
||||
cluster_set_null(&cluster->index);
|
||||
goto new_cluster;
|
||||
}
|
||||
ci = lock_cluster(si, tmp);
|
||||
while (tmp < max) {
|
||||
if (!si->swap_map[tmp]) {
|
||||
found_free = true;
|
||||
break;
|
||||
}
|
||||
tmp++;
|
||||
}
|
||||
unlock_cluster(ci);
|
||||
if (!found_free) {
|
||||
cluster_set_null(&cluster->index);
|
||||
goto new_cluster;
|
||||
@@ -510,15 +572,22 @@ new_cluster:
|
||||
cluster->next = tmp + 1;
|
||||
*offset = tmp;
|
||||
*scan_base = tmp;
|
||||
return found_free;
|
||||
}
|
||||
|
||||
static unsigned long scan_swap_map(struct swap_info_struct *si,
|
||||
unsigned char usage)
|
||||
static int scan_swap_map_slots(struct swap_info_struct *si,
|
||||
unsigned char usage, int nr,
|
||||
swp_entry_t slots[])
|
||||
{
|
||||
struct swap_cluster_info *ci;
|
||||
unsigned long offset;
|
||||
unsigned long scan_base;
|
||||
unsigned long last_in_cluster = 0;
|
||||
int latency_ration = LATENCY_LIMIT;
|
||||
int n_ret = 0;
|
||||
|
||||
if (nr > SWAP_BATCH)
|
||||
nr = SWAP_BATCH;
|
||||
|
||||
/*
|
||||
* We try to cluster swap pages by allocating them sequentially
|
||||
@@ -536,8 +605,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
|
||||
|
||||
/* SSD algorithm */
|
||||
if (si->cluster_info) {
|
||||
scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
|
||||
goto checks;
|
||||
if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
|
||||
goto checks;
|
||||
else
|
||||
goto scan;
|
||||
}
|
||||
|
||||
if (unlikely(!si->cluster_nr--)) {
|
||||
@@ -581,8 +652,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
|
||||
|
||||
checks:
|
||||
if (si->cluster_info) {
|
||||
while (scan_swap_map_ssd_cluster_conflict(si, offset))
|
||||
scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
|
||||
while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
|
||||
/* take a break if we already got some slots */
|
||||
if (n_ret)
|
||||
goto done;
|
||||
if (!scan_swap_map_try_ssd_cluster(si, &offset,
|
||||
&scan_base))
|
||||
goto scan;
|
||||
}
|
||||
}
|
||||
if (!(si->flags & SWP_WRITEOK))
|
||||
goto no_page;
|
||||
@@ -591,9 +668,11 @@ checks:
|
||||
if (offset > si->highest_bit)
|
||||
scan_base = offset = si->lowest_bit;
|
||||
|
||||
ci = lock_cluster(si, offset);
|
||||
/* reuse swap entry of cache-only swap if not busy. */
|
||||
if (vm_swap_full(si) && si->swap_map[offset] == SWAP_HAS_CACHE) {
|
||||
int swap_was_freed;
|
||||
unlock_cluster(ci);
|
||||
spin_unlock(&si->lock);
|
||||
swap_was_freed = __try_to_reclaim_swap(si, offset);
|
||||
spin_lock(&si->lock);
|
||||
@@ -603,8 +682,16 @@ checks:
|
||||
goto scan; /* check next one */
|
||||
}
|
||||
|
||||
if (si->swap_map[offset])
|
||||
goto scan;
|
||||
if (si->swap_map[offset]) {
|
||||
unlock_cluster(ci);
|
||||
if (!n_ret)
|
||||
goto scan;
|
||||
else
|
||||
goto done;
|
||||
}
|
||||
si->swap_map[offset] = usage;
|
||||
inc_cluster_info_page(si, si->cluster_info, offset);
|
||||
unlock_cluster(ci);
|
||||
|
||||
if (offset == si->lowest_bit)
|
||||
si->lowest_bit++;
|
||||
@@ -618,12 +705,44 @@ checks:
|
||||
plist_del(&si->avail_list, &swap_avail_head);
|
||||
spin_unlock(&swap_avail_lock);
|
||||
}
|
||||
si->swap_map[offset] = usage;
|
||||
inc_cluster_info_page(si, si->cluster_info, offset);
|
||||
si->cluster_next = offset + 1;
|
||||
si->flags -= SWP_SCANNING;
|
||||
slots[n_ret++] = swp_entry(si->type, offset);
|
||||
|
||||
return offset;
|
||||
/* got enough slots or reach max slots? */
|
||||
if ((n_ret == nr) || (offset >= si->highest_bit))
|
||||
goto done;
|
||||
|
||||
/* search for next available slot */
|
||||
|
||||
/* time to take a break? */
|
||||
if (unlikely(--latency_ration < 0)) {
|
||||
if (n_ret)
|
||||
goto done;
|
||||
spin_unlock(&si->lock);
|
||||
cond_resched();
|
||||
spin_lock(&si->lock);
|
||||
latency_ration = LATENCY_LIMIT;
|
||||
}
|
||||
|
||||
/* try to get more slots in cluster */
|
||||
if (si->cluster_info) {
|
||||
if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
|
||||
goto checks;
|
||||
else
|
||||
goto done;
|
||||
}
|
||||
/* non-ssd case */
|
||||
++offset;
|
||||
|
||||
/* non-ssd case, still more slots in cluster? */
|
||||
if (si->cluster_nr && !si->swap_map[offset]) {
|
||||
--si->cluster_nr;
|
||||
goto checks;
|
||||
}
|
||||
|
||||
done:
|
||||
si->flags -= SWP_SCANNING;
|
||||
return n_ret;
|
||||
|
||||
scan:
|
||||
spin_unlock(&si->lock);
|
||||
@@ -663,18 +782,42 @@ scan:
|
||||
|
||||
no_page:
|
||||
si->flags -= SWP_SCANNING;
|
||||
return 0;
|
||||
return n_ret;
|
||||
}
|
||||
|
||||
swp_entry_t get_swap_page(void)
|
||||
static unsigned long scan_swap_map(struct swap_info_struct *si,
|
||||
unsigned char usage)
|
||||
{
|
||||
swp_entry_t entry;
|
||||
int n_ret;
|
||||
|
||||
n_ret = scan_swap_map_slots(si, usage, 1, &entry);
|
||||
|
||||
if (n_ret)
|
||||
return swp_offset(entry);
|
||||
else
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
int get_swap_pages(int n_goal, swp_entry_t swp_entries[])
|
||||
{
|
||||
struct swap_info_struct *si, *next;
|
||||
pgoff_t offset;
|
||||
long avail_pgs;
|
||||
int n_ret = 0;
|
||||
int swap_ratio_off = 0;
|
||||
|
||||
if (atomic_long_read(&nr_swap_pages) <= 0)
|
||||
avail_pgs = atomic_long_read(&nr_swap_pages);
|
||||
if (avail_pgs <= 0)
|
||||
goto noswap;
|
||||
atomic_long_dec(&nr_swap_pages);
|
||||
|
||||
if (n_goal > SWAP_BATCH)
|
||||
n_goal = SWAP_BATCH;
|
||||
|
||||
if (n_goal > avail_pgs)
|
||||
n_goal = avail_pgs;
|
||||
|
||||
atomic_long_sub(n_goal, &nr_swap_pages);
|
||||
|
||||
lock_and_start:
|
||||
spin_lock(&swap_avail_lock);
|
||||
@@ -720,14 +863,14 @@ start:
|
||||
spin_unlock(&si->lock);
|
||||
goto nextsi;
|
||||
}
|
||||
|
||||
/* This is called for allocating swap entry for cache */
|
||||
offset = scan_swap_map(si, SWAP_HAS_CACHE);
|
||||
n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
|
||||
n_goal, swp_entries);
|
||||
spin_unlock(&si->lock);
|
||||
if (offset)
|
||||
return swp_entry(si->type, offset);
|
||||
if (n_ret)
|
||||
goto check_out;
|
||||
pr_debug("scan_swap_map of si %d failed to find offset\n",
|
||||
si->type);
|
||||
si->type);
|
||||
|
||||
spin_lock(&swap_avail_lock);
|
||||
nextsi:
|
||||
/*
|
||||
@@ -738,7 +881,8 @@ nextsi:
|
||||
* up between us dropping swap_avail_lock and taking si->lock.
|
||||
* Since we dropped the swap_avail_lock, the swap_avail_head
|
||||
* list may have been modified; so if next is still in the
|
||||
* swap_avail_head list then try it, otherwise start over.
|
||||
* swap_avail_head list then try it, otherwise start over
|
||||
* if we have not gotten any slots.
|
||||
*/
|
||||
if (plist_node_empty(&next->avail_list))
|
||||
goto start_over;
|
||||
@@ -746,9 +890,11 @@ nextsi:
|
||||
|
||||
spin_unlock(&swap_avail_lock);
|
||||
|
||||
atomic_long_inc(&nr_swap_pages);
|
||||
check_out:
|
||||
if (n_ret < n_goal)
|
||||
atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);
|
||||
noswap:
|
||||
return (swp_entry_t) {0};
|
||||
return n_ret;
|
||||
}
|
||||
|
||||
/* The only caller of this function is now suspend routine */
|
||||
@@ -773,7 +919,7 @@ swp_entry_t get_swap_page_of_type(int type)
|
||||
return (swp_entry_t) {0};
|
||||
}
|
||||
|
||||
static struct swap_info_struct *swap_info_get(swp_entry_t entry)
|
||||
static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
|
||||
{
|
||||
struct swap_info_struct *p;
|
||||
unsigned long offset, type;
|
||||
@@ -789,34 +935,76 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry)
|
||||
offset = swp_offset(entry);
|
||||
if (offset >= p->max)
|
||||
goto bad_offset;
|
||||
if (!p->swap_map[offset])
|
||||
goto bad_free;
|
||||
spin_lock(&p->lock);
|
||||
return p;
|
||||
|
||||
bad_free:
|
||||
pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val);
|
||||
goto out;
|
||||
bad_offset:
|
||||
pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val);
|
||||
pr_err("swap_info_get: %s%08lx\n", Bad_offset, entry.val);
|
||||
goto out;
|
||||
bad_device:
|
||||
pr_err("swap_free: %s%08lx\n", Unused_file, entry.val);
|
||||
pr_err("swap_info_get: %s%08lx\n", Unused_file, entry.val);
|
||||
goto out;
|
||||
bad_nofile:
|
||||
pr_err("swap_free: %s%08lx\n", Bad_file, entry.val);
|
||||
pr_err("swap_info_get: %s%08lx\n", Bad_file, entry.val);
|
||||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static unsigned char swap_entry_free(struct swap_info_struct *p,
|
||||
swp_entry_t entry, unsigned char usage)
|
||||
static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
|
||||
{
|
||||
struct swap_info_struct *p;
|
||||
|
||||
p = __swap_info_get(entry);
|
||||
if (!p)
|
||||
goto out;
|
||||
if (!p->swap_map[swp_offset(entry)])
|
||||
goto bad_free;
|
||||
return p;
|
||||
|
||||
bad_free:
|
||||
pr_err("swap_info_get: %s%08lx\n", Unused_offset, entry.val);
|
||||
goto out;
|
||||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct swap_info_struct *swap_info_get(swp_entry_t entry)
|
||||
{
|
||||
struct swap_info_struct *p;
|
||||
|
||||
p = _swap_info_get(entry);
|
||||
if (p)
|
||||
spin_lock(&p->lock);
|
||||
return p;
|
||||
}
|
||||
|
||||
static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
|
||||
struct swap_info_struct *q)
|
||||
{
|
||||
struct swap_info_struct *p;
|
||||
|
||||
p = _swap_info_get(entry);
|
||||
|
||||
if (p != q) {
|
||||
if (q != NULL)
|
||||
spin_unlock(&q->lock);
|
||||
if (p != NULL)
|
||||
spin_lock(&p->lock);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static unsigned char __swap_entry_free(struct swap_info_struct *p,
|
||||
swp_entry_t entry, unsigned char usage)
|
||||
{
|
||||
struct swap_cluster_info *ci;
|
||||
unsigned long offset = swp_offset(entry);
|
||||
unsigned char count;
|
||||
unsigned char has_cache;
|
||||
|
||||
ci = lock_cluster_or_swap_info(p, offset);
|
||||
|
||||
count = p->swap_map[offset];
|
||||
|
||||
has_cache = count & SWAP_HAS_CACHE;
|
||||
count &= ~SWAP_HAS_CACHE;
|
||||
|
||||
@@ -840,40 +1028,54 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
|
||||
}
|
||||
|
||||
usage = count | has_cache;
|
||||
p->swap_map[offset] = usage;
|
||||
p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
|
||||
|
||||
/* free if no reference */
|
||||
if (!usage) {
|
||||
mem_cgroup_uncharge_swap(entry);
|
||||
dec_cluster_info_page(p, p->cluster_info, offset);
|
||||
if (offset < p->lowest_bit)
|
||||
p->lowest_bit = offset;
|
||||
if (offset > p->highest_bit) {
|
||||
bool was_full = !p->highest_bit;
|
||||
p->highest_bit = offset;
|
||||
if (was_full && (p->flags & SWP_WRITEOK)) {
|
||||
spin_lock(&swap_avail_lock);
|
||||
WARN_ON(!plist_node_empty(&p->avail_list));
|
||||
if (plist_node_empty(&p->avail_list))
|
||||
plist_add(&p->avail_list,
|
||||
&swap_avail_head);
|
||||
spin_unlock(&swap_avail_lock);
|
||||
}
|
||||
}
|
||||
atomic_long_inc(&nr_swap_pages);
|
||||
p->inuse_pages--;
|
||||
frontswap_invalidate_page(p->type, offset);
|
||||
if (p->flags & SWP_BLKDEV) {
|
||||
struct gendisk *disk = p->bdev->bd_disk;
|
||||
if (disk->fops->swap_slot_free_notify)
|
||||
disk->fops->swap_slot_free_notify(p->bdev,
|
||||
offset);
|
||||
}
|
||||
}
|
||||
unlock_cluster_or_swap_info(p, ci);
|
||||
|
||||
return usage;
|
||||
}
|
||||
|
||||
static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
|
||||
{
|
||||
struct swap_cluster_info *ci;
|
||||
unsigned long offset = swp_offset(entry);
|
||||
unsigned char count;
|
||||
|
||||
ci = lock_cluster(p, offset);
|
||||
count = p->swap_map[offset];
|
||||
VM_BUG_ON(count != SWAP_HAS_CACHE);
|
||||
p->swap_map[offset] = 0;
|
||||
dec_cluster_info_page(p, p->cluster_info, offset);
|
||||
unlock_cluster(ci);
|
||||
|
||||
mem_cgroup_uncharge_swap(entry);
|
||||
if (offset < p->lowest_bit)
|
||||
p->lowest_bit = offset;
|
||||
if (offset > p->highest_bit) {
|
||||
bool was_full = !p->highest_bit;
|
||||
|
||||
p->highest_bit = offset;
|
||||
if (was_full && (p->flags & SWP_WRITEOK)) {
|
||||
spin_lock(&swap_avail_lock);
|
||||
WARN_ON(!plist_node_empty(&p->avail_list));
|
||||
if (plist_node_empty(&p->avail_list))
|
||||
plist_add(&p->avail_list,
|
||||
&swap_avail_head);
|
||||
spin_unlock(&swap_avail_lock);
|
||||
}
|
||||
}
|
||||
atomic_long_inc(&nr_swap_pages);
|
||||
p->inuse_pages--;
|
||||
frontswap_invalidate_page(p->type, offset);
|
||||
if (p->flags & SWP_BLKDEV) {
|
||||
struct gendisk *disk = p->bdev->bd_disk;
|
||||
|
||||
if (disk->fops->swap_slot_free_notify)
|
||||
disk->fops->swap_slot_free_notify(p->bdev,
|
||||
offset);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Caller has made sure that the swap device corresponding to entry
|
||||
* is still around or has not been recycled.
|
||||
@@ -882,10 +1084,10 @@ void swap_free(swp_entry_t entry)
|
||||
{
|
||||
struct swap_info_struct *p;
|
||||
|
||||
p = swap_info_get(entry);
|
||||
p = _swap_info_get(entry);
|
||||
if (p) {
|
||||
swap_entry_free(p, entry, 1);
|
||||
spin_unlock(&p->lock);
|
||||
if (!__swap_entry_free(p, entry, 1))
|
||||
free_swap_slot(entry);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -896,13 +1098,33 @@ void swapcache_free(swp_entry_t entry)
|
||||
{
|
||||
struct swap_info_struct *p;
|
||||
|
||||
p = swap_info_get(entry);
|
||||
p = _swap_info_get(entry);
|
||||
if (p) {
|
||||
swap_entry_free(p, entry, SWAP_HAS_CACHE);
|
||||
spin_unlock(&p->lock);
|
||||
if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE))
|
||||
free_swap_slot(entry);
|
||||
}
|
||||
}
|
||||
|
||||
void swapcache_free_entries(swp_entry_t *entries, int n)
|
||||
{
|
||||
struct swap_info_struct *p, *prev;
|
||||
int i;
|
||||
|
||||
if (n <= 0)
|
||||
return;
|
||||
|
||||
prev = NULL;
|
||||
p = NULL;
|
||||
for (i = 0; i < n; ++i) {
|
||||
p = swap_info_get_cont(entries[i], prev);
|
||||
if (p)
|
||||
swap_entry_free(p, entries[i]);
|
||||
prev = p;
|
||||
}
|
||||
if (p)
|
||||
spin_unlock(&p->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* How many references to page are currently swapped out?
|
||||
* This does not give an exact answer when swap count is continued,
|
||||
@@ -912,17 +1134,49 @@ int page_swapcount(struct page *page)
|
||||
{
|
||||
int count = 0;
|
||||
struct swap_info_struct *p;
|
||||
struct swap_cluster_info *ci;
|
||||
swp_entry_t entry;
|
||||
unsigned long offset;
|
||||
|
||||
entry.val = page_private(page);
|
||||
p = swap_info_get(entry);
|
||||
p = _swap_info_get(entry);
|
||||
if (p) {
|
||||
count = swap_count(p->swap_map[swp_offset(entry)]);
|
||||
spin_unlock(&p->lock);
|
||||
offset = swp_offset(entry);
|
||||
ci = lock_cluster_or_swap_info(p, offset);
|
||||
count = swap_count(p->swap_map[offset]);
|
||||
unlock_cluster_or_swap_info(p, ci);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
static int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
|
||||
{
|
||||
int count = 0;
|
||||
pgoff_t offset = swp_offset(entry);
|
||||
struct swap_cluster_info *ci;
|
||||
|
||||
ci = lock_cluster_or_swap_info(si, offset);
|
||||
count = swap_count(si->swap_map[offset]);
|
||||
unlock_cluster_or_swap_info(si, ci);
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* How many references to @entry are currently swapped out?
|
||||
* This does not give an exact answer when swap count is continued,
|
||||
* but does include the high COUNT_CONTINUED flag to allow for that.
|
||||
*/
|
||||
int __swp_swapcount(swp_entry_t entry)
|
||||
{
|
||||
int count = 0;
|
||||
struct swap_info_struct *si;
|
||||
|
||||
si = __swap_info_get(entry);
|
||||
if (si)
|
||||
count = swap_swapcount(si, entry);
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* How many references to @entry are currently swapped out?
|
||||
* This considers COUNT_CONTINUED so it returns exact answer.
|
||||
@@ -931,22 +1185,26 @@ int swp_swapcount(swp_entry_t entry)
|
||||
{
|
||||
int count, tmp_count, n;
|
||||
struct swap_info_struct *p;
|
||||
struct swap_cluster_info *ci;
|
||||
struct page *page;
|
||||
pgoff_t offset;
|
||||
unsigned char *map;
|
||||
|
||||
p = swap_info_get(entry);
|
||||
p = _swap_info_get(entry);
|
||||
if (!p)
|
||||
return 0;
|
||||
|
||||
count = swap_count(p->swap_map[swp_offset(entry)]);
|
||||
offset = swp_offset(entry);
|
||||
|
||||
ci = lock_cluster_or_swap_info(p, offset);
|
||||
|
||||
count = swap_count(p->swap_map[offset]);
|
||||
if (!(count & COUNT_CONTINUED))
|
||||
goto out;
|
||||
|
||||
count &= ~COUNT_CONTINUED;
|
||||
n = SWAP_MAP_MAX + 1;
|
||||
|
||||
offset = swp_offset(entry);
|
||||
page = vmalloc_to_page(p->swap_map + offset);
|
||||
offset &= ~PAGE_MASK;
|
||||
VM_BUG_ON(page_private(page) != SWP_CONTINUED);
|
||||
@@ -961,7 +1219,7 @@ int swp_swapcount(swp_entry_t entry)
|
||||
n *= (SWAP_CONT_MAX + 1);
|
||||
} while (tmp_count & COUNT_CONTINUED);
|
||||
out:
|
||||
spin_unlock(&p->lock);
|
||||
unlock_cluster_or_swap_info(p, ci);
|
||||
return count;
|
||||
}
|
||||
|
||||
@@ -1053,21 +1311,23 @@ int free_swap_and_cache(swp_entry_t entry)
|
||||
{
|
||||
struct swap_info_struct *p;
|
||||
struct page *page = NULL;
|
||||
unsigned char count;
|
||||
|
||||
if (non_swap_entry(entry))
|
||||
return 1;
|
||||
|
||||
p = swap_info_get(entry);
|
||||
p = _swap_info_get(entry);
|
||||
if (p) {
|
||||
if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
|
||||
count = __swap_entry_free(p, entry, 1);
|
||||
if (count == SWAP_HAS_CACHE) {
|
||||
page = find_get_page(swap_address_space(entry),
|
||||
swp_offset(entry));
|
||||
if (page && !trylock_page(page)) {
|
||||
put_page(page);
|
||||
page = NULL;
|
||||
}
|
||||
}
|
||||
spin_unlock(&p->lock);
|
||||
} else if (!count)
|
||||
free_swap_slot(entry);
|
||||
}
|
||||
if (page) {
|
||||
/*
|
||||
@@ -1075,7 +1335,8 @@ int free_swap_and_cache(swp_entry_t entry)
|
||||
* Also recheck PageSwapCache now page is locked (above).
|
||||
*/
|
||||
if (PageSwapCache(page) && !PageWriteback(page) &&
|
||||
(!page_mapped(page) || mem_cgroup_swap_full(page))) {
|
||||
(!page_mapped(page) || mem_cgroup_swap_full(page)) &&
|
||||
!swap_swapcount(p, entry)) {
|
||||
delete_from_swap_cache(page);
|
||||
SetPageDirty(page);
|
||||
}
|
||||
@@ -1290,6 +1551,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
|
||||
|
||||
pmd = pmd_offset(pud, addr);
|
||||
do {
|
||||
cond_resched();
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
||||
continue;
|
||||
@@ -1369,6 +1631,7 @@ static int unuse_mm(struct mm_struct *mm,
|
||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||
if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
|
||||
break;
|
||||
cond_resched();
|
||||
}
|
||||
up_read(&mm->mmap_sem);
|
||||
return (ret < 0)? ret: 0;
|
||||
@@ -1406,15 +1669,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
|
||||
prev = 0;
|
||||
i = 1;
|
||||
}
|
||||
if (frontswap) {
|
||||
if (frontswap_test(si, i))
|
||||
break;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
count = READ_ONCE(si->swap_map[i]);
|
||||
if (count && swap_count(count) != SWAP_MAP_BAD)
|
||||
break;
|
||||
if (!frontswap || frontswap_test(si, i))
|
||||
break;
|
||||
if ((i % LATENCY_LIMIT) == 0)
|
||||
cond_resched();
|
||||
}
|
||||
return i;
|
||||
}
|
||||
@@ -1896,6 +2156,17 @@ static void reinsert_swap_info(struct swap_info_struct *p)
|
||||
spin_unlock(&swap_lock);
|
||||
}
|
||||
|
||||
bool has_usable_swap(void)
|
||||
{
|
||||
bool ret = true;
|
||||
|
||||
spin_lock(&swap_lock);
|
||||
if (plist_head_empty(&swap_active_head))
|
||||
ret = false;
|
||||
spin_unlock(&swap_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
||||
{
|
||||
struct swap_info_struct *p = NULL;
|
||||
@@ -1966,6 +2237,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
||||
spin_unlock(&p->lock);
|
||||
spin_unlock(&swap_lock);
|
||||
|
||||
disable_swap_slots_cache_lock();
|
||||
|
||||
set_current_oom_origin();
|
||||
err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
|
||||
clear_current_oom_origin();
|
||||
@@ -1973,9 +2246,12 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
||||
if (err) {
|
||||
/* re-insert swap space back into swap_list */
|
||||
reinsert_swap_info(p);
|
||||
reenable_swap_slots_cache_unlock();
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
reenable_swap_slots_cache_unlock();
|
||||
|
||||
flush_work(&p->discard_work);
|
||||
|
||||
destroy_swap_extents(p);
|
||||
@@ -2014,10 +2290,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
|
||||
free_percpu(p->percpu_cluster);
|
||||
p->percpu_cluster = NULL;
|
||||
vfree(swap_map);
|
||||
vfree(cluster_info);
|
||||
vfree(frontswap_map);
|
||||
kvfree(cluster_info);
|
||||
kvfree(frontswap_map);
|
||||
/* Destroy swap account information */
|
||||
swap_cgroup_swapoff(p->type);
|
||||
exit_swap_address_space(p->type);
|
||||
|
||||
inode = mapping->host;
|
||||
if (S_ISBLK(inode->i_mode)) {
|
||||
@@ -2359,6 +2636,13 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
|
||||
return maxpages;
|
||||
}
|
||||
|
||||
#define SWAP_CLUSTER_INFO_COLS \
|
||||
DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
|
||||
#define SWAP_CLUSTER_SPACE_COLS \
|
||||
DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
|
||||
#define SWAP_CLUSTER_COLS \
|
||||
max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
|
||||
|
||||
static int setup_swap_map_and_extents(struct swap_info_struct *p,
|
||||
union swap_header *swap_header,
|
||||
unsigned char *swap_map,
|
||||
@@ -2366,11 +2650,12 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
|
||||
unsigned long maxpages,
|
||||
sector_t *span)
|
||||
{
|
||||
int i;
|
||||
unsigned int j, k;
|
||||
unsigned int nr_good_pages;
|
||||
int nr_extents;
|
||||
unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
|
||||
unsigned long idx = p->cluster_next / SWAPFILE_CLUSTER;
|
||||
unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS;
|
||||
unsigned long i, idx;
|
||||
|
||||
nr_good_pages = maxpages - 1; /* omit header page */
|
||||
|
||||
@@ -2418,15 +2703,23 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
|
||||
if (!cluster_info)
|
||||
return nr_extents;
|
||||
|
||||
for (i = 0; i < nr_clusters; i++) {
|
||||
if (!cluster_count(&cluster_info[idx])) {
|
||||
|
||||
/*
|
||||
* Reduce false cache line sharing between cluster_info and
|
||||
* sharing same address space.
|
||||
*/
|
||||
for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
|
||||
j = (k + col) % SWAP_CLUSTER_COLS;
|
||||
for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
|
||||
idx = i * SWAP_CLUSTER_COLS + j;
|
||||
if (idx >= nr_clusters)
|
||||
continue;
|
||||
if (cluster_count(&cluster_info[idx]))
|
||||
continue;
|
||||
cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
|
||||
cluster_list_add_tail(&p->free_clusters, cluster_info,
|
||||
idx);
|
||||
}
|
||||
idx++;
|
||||
if (idx == nr_clusters)
|
||||
idx = 0;
|
||||
}
|
||||
return nr_extents;
|
||||
}
|
||||
@@ -2529,6 +2822,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
||||
|
||||
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
|
||||
int cpu;
|
||||
unsigned long ci, nr_cluster;
|
||||
|
||||
p->flags |= SWP_SOLIDSTATE;
|
||||
/*
|
||||
@@ -2536,13 +2830,18 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
||||
* SSD
|
||||
*/
|
||||
p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
|
||||
nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
|
||||
|
||||
cluster_info = vzalloc(DIV_ROUND_UP(maxpages,
|
||||
SWAPFILE_CLUSTER) * sizeof(*cluster_info));
|
||||
cluster_info = kvzalloc(nr_cluster * sizeof(*cluster_info),
|
||||
GFP_KERNEL);
|
||||
if (!cluster_info) {
|
||||
error = -ENOMEM;
|
||||
goto bad_swap;
|
||||
}
|
||||
|
||||
for (ci = 0; ci < nr_cluster; ci++)
|
||||
spin_lock_init(&((cluster_info + ci)->lock));
|
||||
|
||||
p->percpu_cluster = alloc_percpu(struct percpu_cluster);
|
||||
if (!p->percpu_cluster) {
|
||||
error = -ENOMEM;
|
||||
@@ -2567,7 +2866,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
||||
}
|
||||
/* frontswap enabled? set up bit-per-page map for frontswap */
|
||||
if (IS_ENABLED(CONFIG_FRONTSWAP))
|
||||
frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
|
||||
frontswap_map = kvzalloc(BITS_TO_LONGS(maxpages) * sizeof(long),
|
||||
GFP_KERNEL);
|
||||
|
||||
if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
|
||||
/*
|
||||
@@ -2602,6 +2902,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
||||
if (p->bdev && blk_queue_fast(bdev_get_queue(p->bdev)))
|
||||
p->flags |= SWP_FAST;
|
||||
|
||||
error = init_swap_address_space(p->type, maxpages);
|
||||
if (error)
|
||||
goto bad_swap;
|
||||
|
||||
mutex_lock(&swapon_mutex);
|
||||
prio = -1;
|
||||
if (swap_flags & SWAP_FLAG_PREFER) {
|
||||
@@ -2642,7 +2946,8 @@ bad_swap:
|
||||
p->flags = 0;
|
||||
spin_unlock(&swap_lock);
|
||||
vfree(swap_map);
|
||||
vfree(cluster_info);
|
||||
kvfree(cluster_info);
|
||||
kvfree(frontswap_map);
|
||||
if (swap_file) {
|
||||
if (inode && S_ISREG(inode->i_mode)) {
|
||||
inode_unlock(inode);
|
||||
@@ -2659,6 +2964,8 @@ out:
|
||||
putname(name);
|
||||
if (inode && S_ISREG(inode->i_mode))
|
||||
inode_unlock(inode);
|
||||
if (!error)
|
||||
enable_swap_slots_cache();
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -2693,6 +3000,7 @@ void si_swapinfo(struct sysinfo *val)
|
||||
static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
|
||||
{
|
||||
struct swap_info_struct *p;
|
||||
struct swap_cluster_info *ci;
|
||||
unsigned long offset, type;
|
||||
unsigned char count;
|
||||
unsigned char has_cache;
|
||||
@@ -2706,10 +3014,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
|
||||
goto bad_file;
|
||||
p = swap_info[type];
|
||||
offset = swp_offset(entry);
|
||||
|
||||
spin_lock(&p->lock);
|
||||
if (unlikely(offset >= p->max))
|
||||
goto unlock_out;
|
||||
goto out;
|
||||
|
||||
ci = lock_cluster_or_swap_info(p, offset);
|
||||
|
||||
count = p->swap_map[offset];
|
||||
|
||||
@@ -2752,7 +3060,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
|
||||
p->swap_map[offset] = count | has_cache;
|
||||
|
||||
unlock_out:
|
||||
spin_unlock(&p->lock);
|
||||
unlock_cluster_or_swap_info(p, ci);
|
||||
out:
|
||||
return err;
|
||||
|
||||
@@ -2841,6 +3149,7 @@ EXPORT_SYMBOL_GPL(__page_file_index);
|
||||
int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
|
||||
{
|
||||
struct swap_info_struct *si;
|
||||
struct swap_cluster_info *ci;
|
||||
struct page *head;
|
||||
struct page *page;
|
||||
struct page *list_page;
|
||||
@@ -2864,6 +3173,9 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
|
||||
}
|
||||
|
||||
offset = swp_offset(entry);
|
||||
|
||||
ci = lock_cluster(si, offset);
|
||||
|
||||
count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
|
||||
|
||||
if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
|
||||
@@ -2876,6 +3188,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
|
||||
}
|
||||
|
||||
if (!page) {
|
||||
unlock_cluster(ci);
|
||||
spin_unlock(&si->lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
@@ -2924,6 +3237,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
|
||||
list_add_tail(&page->lru, &head->lru);
|
||||
page = NULL; /* now it's attached, don't free it */
|
||||
out:
|
||||
unlock_cluster(ci);
|
||||
spin_unlock(&si->lock);
|
||||
outer:
|
||||
if (page)
|
||||
@@ -2937,7 +3251,8 @@ outer:
|
||||
* into, carry if so, or else fail until a new continuation page is allocated;
|
||||
* when the original swap_map count is decremented from 0 with continuation,
|
||||
* borrow from the continuation and report whether it still holds more.
|
||||
* Called while __swap_duplicate() or swap_entry_free() holds swap_lock.
|
||||
* Called while __swap_duplicate() or swap_entry_free() holds swap or cluster
|
||||
* lock.
|
||||
*/
|
||||
static bool swap_count_continued(struct swap_info_struct *si,
|
||||
pgoff_t offset, unsigned char count)
|
||||
|
||||
@@ -1591,6 +1591,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
||||
}
|
||||
}
|
||||
|
||||
prefetch(&sk->sk_rmem_alloc);
|
||||
if (rcu_access_pointer(sk->sk_filter) &&
|
||||
udp_lib_checksum_complete(skb))
|
||||
goto csum_error;
|
||||
|
||||
@@ -612,6 +612,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
|
||||
}
|
||||
}
|
||||
|
||||
prefetch(&sk->sk_rmem_alloc);
|
||||
if (rcu_access_pointer(sk->sk_filter) &&
|
||||
udp_lib_checksum_complete(skb))
|
||||
goto csum_error;
|
||||
|
||||
Reference in New Issue
Block a user