Merge commit 'v2.6.30-rc1' into core/urgent
Merge reason: need latest upstream to queue up dependent fix Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
commit
ff96e612cb
3134 changed files with 505113 additions and 125827 deletions
10
CREDITS
10
CREDITS
|
@ -1412,8 +1412,8 @@ P: 1024D/77D4FC9B F5C5 1C20 1DFC DEC3 3107 54A4 2332 ADFC 77D4 FC9B
|
||||||
D: National Language Support
|
D: National Language Support
|
||||||
D: Linux Internationalization Project
|
D: Linux Internationalization Project
|
||||||
D: German Localization for Linux and GNU software
|
D: German Localization for Linux and GNU software
|
||||||
S: Kriemhildring 12a
|
S: Auf der Fittel 18
|
||||||
S: 65795 Hattersheim am Main
|
S: 53347 Alfter
|
||||||
S: Germany
|
S: Germany
|
||||||
|
|
||||||
N: Christoph Hellwig
|
N: Christoph Hellwig
|
||||||
|
@ -3580,6 +3580,12 @@ N: Dirk Verworner
|
||||||
D: Co-author of German book ``Linux-Kernel-Programmierung''
|
D: Co-author of German book ``Linux-Kernel-Programmierung''
|
||||||
D: Co-founder of Berlin Linux User Group
|
D: Co-founder of Berlin Linux User Group
|
||||||
|
|
||||||
|
N: Riku Voipio
|
||||||
|
E: riku.voipio@iki.fi
|
||||||
|
D: Author of PCA9532 LED and Fintek f75375s hwmon driver
|
||||||
|
D: Some random ARM board patches
|
||||||
|
S: Finland
|
||||||
|
|
||||||
N: Patrick Volkerding
|
N: Patrick Volkerding
|
||||||
E: volkerdi@ftp.cdrom.com
|
E: volkerdi@ftp.cdrom.com
|
||||||
D: Produced the Slackware distribution, updated the SVGAlib
|
D: Produced the Slackware distribution, updated the SVGAlib
|
||||||
|
|
|
@ -86,6 +86,8 @@ cachetlb.txt
|
||||||
- describes the cache/TLB flushing interfaces Linux uses.
|
- describes the cache/TLB flushing interfaces Linux uses.
|
||||||
cdrom/
|
cdrom/
|
||||||
- directory with information on the CD-ROM drivers that Linux has.
|
- directory with information on the CD-ROM drivers that Linux has.
|
||||||
|
cgroups/
|
||||||
|
- cgroups features, including cpusets and memory controller.
|
||||||
connector/
|
connector/
|
||||||
- docs on the netlink based userspace<->kernel space communication mod.
|
- docs on the netlink based userspace<->kernel space communication mod.
|
||||||
console/
|
console/
|
||||||
|
@ -98,8 +100,6 @@ cpu-load.txt
|
||||||
- document describing how CPU load statistics are collected.
|
- document describing how CPU load statistics are collected.
|
||||||
cpuidle/
|
cpuidle/
|
||||||
- info on CPU_IDLE, CPU idle state management subsystem.
|
- info on CPU_IDLE, CPU idle state management subsystem.
|
||||||
cpusets.txt
|
|
||||||
- documents the cpusets feature; assign CPUs and Mem to a set of tasks.
|
|
||||||
cputopology.txt
|
cputopology.txt
|
||||||
- documentation on how CPU topology info is exported via sysfs.
|
- documentation on how CPU topology info is exported via sysfs.
|
||||||
cris/
|
cris/
|
||||||
|
|
71
Documentation/ABI/testing/debugfs-kmemtrace
Normal file
71
Documentation/ABI/testing/debugfs-kmemtrace
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
What: /sys/kernel/debug/kmemtrace/
|
||||||
|
Date: July 2008
|
||||||
|
Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
|
||||||
|
Description:
|
||||||
|
|
||||||
|
In kmemtrace-enabled kernels, the following files are created:
|
||||||
|
|
||||||
|
/sys/kernel/debug/kmemtrace/
|
||||||
|
cpu<n> (0400) Per-CPU tracing data, see below. (binary)
|
||||||
|
total_overruns (0400) Total number of bytes which were dropped from
|
||||||
|
cpu<n> files because of full buffer condition,
|
||||||
|
non-binary. (text)
|
||||||
|
abi_version (0400) Kernel's kmemtrace ABI version. (text)
|
||||||
|
|
||||||
|
Each per-CPU file should be read according to the relay interface. That is,
|
||||||
|
the reader should set affinity to that specific CPU and, as currently done by
|
||||||
|
the userspace application (though there are other methods), use poll() with
|
||||||
|
an infinite timeout before every read(). Otherwise, erroneous data may be
|
||||||
|
read. The binary data has the following _core_ format:
|
||||||
|
|
||||||
|
Event ID (1 byte) Unsigned integer, one of:
|
||||||
|
0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
|
||||||
|
1 - represents a freeing of previously allocated memory
|
||||||
|
(KMEMTRACE_EVENT_FREE)
|
||||||
|
Type ID (1 byte) Unsigned integer, one of:
|
||||||
|
0 - this is a kmalloc() / kfree()
|
||||||
|
1 - this is a kmem_cache_alloc() / kmem_cache_free()
|
||||||
|
2 - this is a __get_free_pages() et al.
|
||||||
|
Event size (2 bytes) Unsigned integer representing the
|
||||||
|
size of this event. Used to extend
|
||||||
|
kmemtrace. Discard the bytes you
|
||||||
|
don't know about.
|
||||||
|
Sequence number (4 bytes) Signed integer used to reorder data
|
||||||
|
logged on SMP machines. Wraparound
|
||||||
|
must be taken into account, although
|
||||||
|
it is unlikely.
|
||||||
|
Caller address (8 bytes) Return address to the caller.
|
||||||
|
Pointer to mem (8 bytes) Pointer to target memory area. Can be
|
||||||
|
NULL, but not all such calls might be
|
||||||
|
recorded.
|
||||||
|
|
||||||
|
In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
|
||||||
|
|
||||||
|
Requested bytes (8 bytes) Total number of requested bytes,
|
||||||
|
unsigned, must not be zero.
|
||||||
|
Allocated bytes (8 bytes) Total number of actually allocated
|
||||||
|
bytes, unsigned, must not be lower
|
||||||
|
than requested bytes.
|
||||||
|
Requested flags (4 bytes) GFP flags supplied by the caller.
|
||||||
|
Target CPU (4 bytes) Signed integer, valid for event id 1.
|
||||||
|
If equal to -1, target CPU is the same
|
||||||
|
as origin CPU, but the reverse might
|
||||||
|
not be true.
|
||||||
|
|
||||||
|
The data is made available in the same endianness the machine has.
|
||||||
|
|
||||||
|
Other event ids and type ids may be defined and added. Other fields may be
|
||||||
|
added by increasing event size, but see below for details.
|
||||||
|
Every modification to the ABI, including new id definitions, are followed
|
||||||
|
by bumping the ABI version by one.
|
||||||
|
|
||||||
|
Adding new data to the packet (features) is done at the end of the mandatory
|
||||||
|
data:
|
||||||
|
Feature size (2 byte)
|
||||||
|
Feature ID (1 byte)
|
||||||
|
Feature data (Feature size - 3 bytes)
|
||||||
|
|
||||||
|
|
||||||
|
Users:
|
||||||
|
kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
|
||||||
|
|
|
@ -4,8 +4,8 @@ KernelVersion: 2.6.26
|
||||||
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||||
Description:
|
Description:
|
||||||
Some regulator directories will contain a field called
|
Some regulator directories will contain a field called
|
||||||
state. This reports the regulator enable status, for
|
state. This reports the regulator enable control, for
|
||||||
regulators which can report that value.
|
regulators which can report that input value.
|
||||||
|
|
||||||
This will be one of the following strings:
|
This will be one of the following strings:
|
||||||
|
|
||||||
|
@ -14,16 +14,54 @@ Description:
|
||||||
'unknown'
|
'unknown'
|
||||||
|
|
||||||
'enabled' means the regulator output is ON and is supplying
|
'enabled' means the regulator output is ON and is supplying
|
||||||
power to the system.
|
power to the system (assuming no error prevents it).
|
||||||
|
|
||||||
'disabled' means the regulator output is OFF and is not
|
'disabled' means the regulator output is OFF and is not
|
||||||
supplying power to the system..
|
supplying power to the system (unless some non-Linux
|
||||||
|
control has enabled it).
|
||||||
|
|
||||||
'unknown' means software cannot determine the state, or
|
'unknown' means software cannot determine the state, or
|
||||||
the reported state is invalid.
|
the reported state is invalid.
|
||||||
|
|
||||||
NOTE: this field can be used in conjunction with microvolts
|
NOTE: this field can be used in conjunction with microvolts
|
||||||
and microamps to determine regulator output levels.
|
or microamps to determine configured regulator output levels.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/class/regulator/.../status
|
||||||
|
Description:
|
||||||
|
Some regulator directories will contain a field called
|
||||||
|
"status". This reports the current regulator status, for
|
||||||
|
regulators which can report that output value.
|
||||||
|
|
||||||
|
This will be one of the following strings:
|
||||||
|
|
||||||
|
off
|
||||||
|
on
|
||||||
|
error
|
||||||
|
fast
|
||||||
|
normal
|
||||||
|
idle
|
||||||
|
standby
|
||||||
|
|
||||||
|
"off" means the regulator is not supplying power to the
|
||||||
|
system.
|
||||||
|
|
||||||
|
"on" means the regulator is supplying power to the system,
|
||||||
|
and the regulator can't report a detailed operation mode.
|
||||||
|
|
||||||
|
"error" indicates an out-of-regulation status such as being
|
||||||
|
disabled due to thermal shutdown, or voltage being unstable
|
||||||
|
because of problems with the input power supply.
|
||||||
|
|
||||||
|
"fast", "normal", "idle", and "standby" are all detailed
|
||||||
|
regulator operation modes (described elsewhere). They
|
||||||
|
imply "on", but provide more detail.
|
||||||
|
|
||||||
|
Note that regulator status is a function of many inputs,
|
||||||
|
not limited to control inputs from Linux. For example,
|
||||||
|
the actual load presented may trigger "error" status; or
|
||||||
|
a regulator may be enabled by another user, even though
|
||||||
|
Linux did not enable it.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/class/regulator/.../type
|
What: /sys/class/regulator/.../type
|
||||||
|
@ -58,7 +96,7 @@ Description:
|
||||||
Some regulator directories will contain a field called
|
Some regulator directories will contain a field called
|
||||||
microvolts. This holds the regulator output voltage setting
|
microvolts. This holds the regulator output voltage setting
|
||||||
measured in microvolts (i.e. E-6 Volts), for regulators
|
measured in microvolts (i.e. E-6 Volts), for regulators
|
||||||
which can report that voltage.
|
which can report the control input for voltage.
|
||||||
|
|
||||||
NOTE: This value should not be used to determine the regulator
|
NOTE: This value should not be used to determine the regulator
|
||||||
output voltage level as this value is the same regardless of
|
output voltage level as this value is the same regardless of
|
||||||
|
@ -73,7 +111,7 @@ Description:
|
||||||
Some regulator directories will contain a field called
|
Some regulator directories will contain a field called
|
||||||
microamps. This holds the regulator output current limit
|
microamps. This holds the regulator output current limit
|
||||||
setting measured in microamps (i.e. E-6 Amps), for regulators
|
setting measured in microamps (i.e. E-6 Amps), for regulators
|
||||||
which can report that current.
|
which can report the control input for a current limit.
|
||||||
|
|
||||||
NOTE: This value should not be used to determine the regulator
|
NOTE: This value should not be used to determine the regulator
|
||||||
output current level as this value is the same regardless of
|
output current level as this value is the same regardless of
|
||||||
|
@ -87,7 +125,7 @@ Contact: Liam Girdwood <lrg@slimlogic.co.uk>
|
||||||
Description:
|
Description:
|
||||||
Some regulator directories will contain a field called
|
Some regulator directories will contain a field called
|
||||||
opmode. This holds the current regulator operating mode,
|
opmode. This holds the current regulator operating mode,
|
||||||
for regulators which can report it.
|
for regulators which can report that control input value.
|
||||||
|
|
||||||
The opmode value can be one of the following strings:
|
The opmode value can be one of the following strings:
|
||||||
|
|
||||||
|
@ -101,7 +139,8 @@ Description:
|
||||||
|
|
||||||
NOTE: This value should not be used to determine the regulator
|
NOTE: This value should not be used to determine the regulator
|
||||||
output operating mode as this value is the same regardless of
|
output operating mode as this value is the same regardless of
|
||||||
whether the regulator is enabled or disabled.
|
whether the regulator is enabled or disabled. A "status"
|
||||||
|
attribute may be available to determine the actual mode.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/class/regulator/.../min_microvolts
|
What: /sys/class/regulator/.../min_microvolts
|
||||||
|
|
|
@ -136,7 +136,7 @@ exactly why.
|
||||||
The standard 32-bit addressing PCI device would do something like
|
The standard 32-bit addressing PCI device would do something like
|
||||||
this:
|
this:
|
||||||
|
|
||||||
if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
|
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
|
||||||
printk(KERN_WARNING
|
printk(KERN_WARNING
|
||||||
"mydev: No suitable DMA available.\n");
|
"mydev: No suitable DMA available.\n");
|
||||||
goto ignore_this_device;
|
goto ignore_this_device;
|
||||||
|
@ -155,9 +155,9 @@ all 64-bits when accessing streaming DMA:
|
||||||
|
|
||||||
int using_dac;
|
int using_dac;
|
||||||
|
|
||||||
if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) {
|
if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||||
using_dac = 1;
|
using_dac = 1;
|
||||||
} else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
|
} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
|
||||||
using_dac = 0;
|
using_dac = 0;
|
||||||
} else {
|
} else {
|
||||||
printk(KERN_WARNING
|
printk(KERN_WARNING
|
||||||
|
@ -170,14 +170,14 @@ the case would look like this:
|
||||||
|
|
||||||
int using_dac, consistent_using_dac;
|
int using_dac, consistent_using_dac;
|
||||||
|
|
||||||
if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) {
|
if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
|
||||||
using_dac = 1;
|
using_dac = 1;
|
||||||
consistent_using_dac = 1;
|
consistent_using_dac = 1;
|
||||||
pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
|
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||||
} else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
|
} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
|
||||||
using_dac = 0;
|
using_dac = 0;
|
||||||
consistent_using_dac = 0;
|
consistent_using_dac = 0;
|
||||||
pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
|
pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||||
} else {
|
} else {
|
||||||
printk(KERN_WARNING
|
printk(KERN_WARNING
|
||||||
"mydev: No suitable DMA available.\n");
|
"mydev: No suitable DMA available.\n");
|
||||||
|
@ -192,7 +192,7 @@ check the return value from pci_set_consistent_dma_mask().
|
||||||
Finally, if your device can only drive the low 24-bits of
|
Finally, if your device can only drive the low 24-bits of
|
||||||
address during PCI bus mastering you might do something like:
|
address during PCI bus mastering you might do something like:
|
||||||
|
|
||||||
if (pci_set_dma_mask(pdev, DMA_24BIT_MASK)) {
|
if (pci_set_dma_mask(pdev, DMA_BIT_MASK(24))) {
|
||||||
printk(KERN_WARNING
|
printk(KERN_WARNING
|
||||||
"mydev: 24-bit DMA addressing not available.\n");
|
"mydev: 24-bit DMA addressing not available.\n");
|
||||||
goto ignore_this_device;
|
goto ignore_this_device;
|
||||||
|
@ -213,7 +213,7 @@ most specific mask.
|
||||||
|
|
||||||
Here is pseudo-code showing how this might be done:
|
Here is pseudo-code showing how this might be done:
|
||||||
|
|
||||||
#define PLAYBACK_ADDRESS_BITS DMA_32BIT_MASK
|
#define PLAYBACK_ADDRESS_BITS DMA_BIT_MASK(32)
|
||||||
#define RECORD_ADDRESS_BITS 0x00ffffff
|
#define RECORD_ADDRESS_BITS 0x00ffffff
|
||||||
|
|
||||||
struct my_sound_card *card;
|
struct my_sound_card *card;
|
||||||
|
|
4
Documentation/DocBook/.gitignore
vendored
4
Documentation/DocBook/.gitignore
vendored
|
@ -4,3 +4,7 @@
|
||||||
*.html
|
*.html
|
||||||
*.9.gz
|
*.9.gz
|
||||||
*.9
|
*.9
|
||||||
|
*.aux
|
||||||
|
*.dvi
|
||||||
|
*.log
|
||||||
|
*.out
|
||||||
|
|
|
@ -259,7 +259,7 @@ X!Earch/x86/kernel/mca_32.c
|
||||||
!Eblock/blk-tag.c
|
!Eblock/blk-tag.c
|
||||||
!Iblock/blk-tag.c
|
!Iblock/blk-tag.c
|
||||||
!Eblock/blk-integrity.c
|
!Eblock/blk-integrity.c
|
||||||
!Iblock/blktrace.c
|
!Ikernel/trace/blktrace.c
|
||||||
!Iblock/genhd.c
|
!Iblock/genhd.c
|
||||||
!Eblock/genhd.c
|
!Eblock/genhd.c
|
||||||
</chapter>
|
</chapter>
|
||||||
|
|
|
@ -1137,8 +1137,8 @@
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
return err;
|
return err;
|
||||||
/* check PCI availability (28bit DMA) */
|
/* check PCI availability (28bit DMA) */
|
||||||
if (pci_set_dma_mask(pci, DMA_28BIT_MASK) < 0 ||
|
if (pci_set_dma_mask(pci, DMA_BIT_MASK(28)) < 0 ||
|
||||||
pci_set_consistent_dma_mask(pci, DMA_28BIT_MASK) < 0) {
|
pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(28)) < 0) {
|
||||||
printk(KERN_ERR "error to set 28bit mask DMA\n");
|
printk(KERN_ERR "error to set 28bit mask DMA\n");
|
||||||
pci_disable_device(pci);
|
pci_disable_device(pci);
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
|
@ -1252,8 +1252,8 @@
|
||||||
err = pci_enable_device(pci);
|
err = pci_enable_device(pci);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
return err;
|
return err;
|
||||||
if (pci_set_dma_mask(pci, DMA_28BIT_MASK) < 0 ||
|
if (pci_set_dma_mask(pci, DMA_BIT_MASK(28)) < 0 ||
|
||||||
pci_set_consistent_dma_mask(pci, DMA_28BIT_MASK) < 0) {
|
pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(28)) < 0) {
|
||||||
printk(KERN_ERR "error to set 28bit mask DMA\n");
|
printk(KERN_ERR "error to set 28bit mask DMA\n");
|
||||||
pci_disable_device(pci);
|
pci_disable_device(pci);
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
|
|
|
@ -118,7 +118,7 @@ Following are the RCU equivalents for these two functions:
|
||||||
list_for_each_entry(e, list, list) {
|
list_for_each_entry(e, list, list) {
|
||||||
if (!audit_compare_rule(rule, &e->rule)) {
|
if (!audit_compare_rule(rule, &e->rule)) {
|
||||||
list_del_rcu(&e->list);
|
list_del_rcu(&e->list);
|
||||||
call_rcu(&e->rcu, audit_free_rule, e);
|
call_rcu(&e->rcu, audit_free_rule);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -206,7 +206,7 @@ RCU ("read-copy update") its name. The RCU code is as follows:
|
||||||
ne->rule.action = newaction;
|
ne->rule.action = newaction;
|
||||||
ne->rule.file_count = newfield_count;
|
ne->rule.file_count = newfield_count;
|
||||||
list_replace_rcu(e, ne);
|
list_replace_rcu(e, ne);
|
||||||
call_rcu(&e->rcu, audit_free_rule, e);
|
call_rcu(&e->rcu, audit_free_rule);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -283,7 +283,7 @@ flag under the spinlock as follows:
|
||||||
list_del_rcu(&e->list);
|
list_del_rcu(&e->list);
|
||||||
e->deleted = 1;
|
e->deleted = 1;
|
||||||
spin_unlock(&e->lock);
|
spin_unlock(&e->lock);
|
||||||
call_rcu(&e->rcu, audit_free_rule, e);
|
call_rcu(&e->rcu, audit_free_rule);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,7 @@ if (obj) {
|
||||||
/*
|
/*
|
||||||
* Because a writer could delete object, and a writer could
|
* Because a writer could delete object, and a writer could
|
||||||
* reuse these object before the RCU grace period, we
|
* reuse these object before the RCU grace period, we
|
||||||
* must check key after geting the reference on object
|
* must check key after getting the reference on object
|
||||||
*/
|
*/
|
||||||
if (obj->key != key) { // not the object we expected
|
if (obj->key != key) { // not the object we expected
|
||||||
put_ref(obj);
|
put_ref(obj);
|
||||||
|
@ -117,7 +117,7 @@ a race (some writer did a delete and/or a move of an object
|
||||||
to another chain) checking the final 'nulls' value if
|
to another chain) checking the final 'nulls' value if
|
||||||
the lookup met the end of chain. If final 'nulls' value
|
the lookup met the end of chain. If final 'nulls' value
|
||||||
is not the slot number, then we must restart the lookup at
|
is not the slot number, then we must restart the lookup at
|
||||||
the begining. If the object was moved to same chain,
|
the beginning. If the object was moved to the same chain,
|
||||||
then the reader doesnt care : It might eventually
|
then the reader doesnt care : It might eventually
|
||||||
scan the list again without harm.
|
scan the list again without harm.
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,8 @@ cpqarray.txt
|
||||||
- info on using Compaq's SMART2 Intelligent Disk Array Controllers.
|
- info on using Compaq's SMART2 Intelligent Disk Array Controllers.
|
||||||
floppy.txt
|
floppy.txt
|
||||||
- notes and driver options for the floppy disk driver.
|
- notes and driver options for the floppy disk driver.
|
||||||
|
mflash.txt
|
||||||
|
- info on mGine m(g)flash driver for linux.
|
||||||
nbd.txt
|
nbd.txt
|
||||||
- info on a TCP implementation of a network block device.
|
- info on a TCP implementation of a network block device.
|
||||||
paride.txt
|
paride.txt
|
||||||
|
|
84
Documentation/blockdev/mflash.txt
Normal file
84
Documentation/blockdev/mflash.txt
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
This document describes m[g]flash support in linux.
|
||||||
|
|
||||||
|
Contents
|
||||||
|
1. Overview
|
||||||
|
2. Reserved area configuration
|
||||||
|
3. Example of mflash platform driver registration
|
||||||
|
|
||||||
|
1. Overview
|
||||||
|
|
||||||
|
Mflash and gflash are embedded flash drive. The only difference is mflash is
|
||||||
|
MCP(Multi Chip Package) device. These two device operate exactly same way.
|
||||||
|
So the rest mflash repersents mflash and gflash altogether.
|
||||||
|
|
||||||
|
Internally, mflash has nand flash and other hardware logics and supports
|
||||||
|
2 different operation (ATA, IO) modes. ATA mode doesn't need any new
|
||||||
|
driver and currently works well under standard IDE subsystem. Actually it's
|
||||||
|
one chip SSD. IO mode is ATA-like custom mode for the host that doesn't have
|
||||||
|
IDE interface.
|
||||||
|
|
||||||
|
Followings are brief descriptions about IO mode.
|
||||||
|
A. IO mode based on ATA protocol and uses some custom command. (read confirm,
|
||||||
|
write confirm)
|
||||||
|
B. IO mode uses SRAM bus interface.
|
||||||
|
C. IO mode supports 4kB boot area, so host can boot from mflash.
|
||||||
|
|
||||||
|
2. Reserved area configuration
|
||||||
|
If host boot from mflash, usually needs raw area for boot loader image. All of
|
||||||
|
the mflash's block device operation will be taken this value as start offset.
|
||||||
|
Note that boot loader's size of reserved area and kernel configuration value
|
||||||
|
must be same.
|
||||||
|
|
||||||
|
3. Example of mflash platform driver registration
|
||||||
|
Working mflash is very straight forward. Adding platform device stuff to board
|
||||||
|
configuration file is all. Here is some pseudo example.
|
||||||
|
|
||||||
|
static struct mg_drv_data mflash_drv_data = {
|
||||||
|
/* If you want to polling driver set to 1 */
|
||||||
|
.use_polling = 0,
|
||||||
|
/* device attribution */
|
||||||
|
.dev_attr = MG_BOOT_DEV
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct resource mg_mflash_rsc[] = {
|
||||||
|
/* Base address of mflash */
|
||||||
|
[0] = {
|
||||||
|
.start = 0x08000000,
|
||||||
|
.end = 0x08000000 + SZ_64K - 1,
|
||||||
|
.flags = IORESOURCE_MEM
|
||||||
|
},
|
||||||
|
/* mflash interrupt pin */
|
||||||
|
[1] = {
|
||||||
|
.start = IRQ_GPIO(84),
|
||||||
|
.end = IRQ_GPIO(84),
|
||||||
|
.flags = IORESOURCE_IRQ
|
||||||
|
},
|
||||||
|
/* mflash reset pin */
|
||||||
|
[2] = {
|
||||||
|
.start = 43,
|
||||||
|
.end = 43,
|
||||||
|
.name = MG_RST_PIN,
|
||||||
|
.flags = IORESOURCE_IO
|
||||||
|
},
|
||||||
|
/* mflash reset-out pin
|
||||||
|
* If you use mflash as storage device (i.e. other than MG_BOOT_DEV),
|
||||||
|
* should assign this */
|
||||||
|
[3] = {
|
||||||
|
.start = 51,
|
||||||
|
.end = 51,
|
||||||
|
.name = MG_RSTOUT_PIN,
|
||||||
|
.flags = IORESOURCE_IO
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct platform_device mflash_dev = {
|
||||||
|
.name = MG_DEV_NAME,
|
||||||
|
.id = -1,
|
||||||
|
.dev = {
|
||||||
|
.platform_data = &mflash_drv_data,
|
||||||
|
},
|
||||||
|
.num_resources = ARRAY_SIZE(mg_mflash_rsc),
|
||||||
|
.resource = mg_mflash_rsc
|
||||||
|
};
|
||||||
|
|
||||||
|
platform_device_register(&mflash_dev);
|
18
Documentation/cgroups/00-INDEX
Normal file
18
Documentation/cgroups/00-INDEX
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
00-INDEX
|
||||||
|
- this file
|
||||||
|
cgroups.txt
|
||||||
|
- Control Groups definition, implementation details, examples and API.
|
||||||
|
cpuacct.txt
|
||||||
|
- CPU Accounting Controller; account CPU usage for groups of tasks.
|
||||||
|
cpusets.txt
|
||||||
|
- documents the cpusets feature; assign CPUs and Mem to a set of tasks.
|
||||||
|
devices.txt
|
||||||
|
- Device Whitelist Controller; description, interface and security.
|
||||||
|
freezer-subsystem.txt
|
||||||
|
- checkpointing; rationale to not use signals, interface.
|
||||||
|
memcg_test.txt
|
||||||
|
- Memory Resource Controller; implementation details.
|
||||||
|
memory.txt
|
||||||
|
- Memory Resource Controller; design, accounting, interface, testing.
|
||||||
|
resource_counter.txt
|
||||||
|
- Resource Counter API.
|
|
@ -56,7 +56,7 @@ hierarchy, and a set of subsystems; each subsystem has system-specific
|
||||||
state attached to each cgroup in the hierarchy. Each hierarchy has
|
state attached to each cgroup in the hierarchy. Each hierarchy has
|
||||||
an instance of the cgroup virtual filesystem associated with it.
|
an instance of the cgroup virtual filesystem associated with it.
|
||||||
|
|
||||||
At any one time there may be multiple active hierachies of task
|
At any one time there may be multiple active hierarchies of task
|
||||||
cgroups. Each hierarchy is a partition of all tasks in the system.
|
cgroups. Each hierarchy is a partition of all tasks in the system.
|
||||||
|
|
||||||
User level code may create and destroy cgroups by name in an
|
User level code may create and destroy cgroups by name in an
|
||||||
|
@ -124,10 +124,10 @@ following lines:
|
||||||
/ \
|
/ \
|
||||||
Prof (15%) students (5%)
|
Prof (15%) students (5%)
|
||||||
|
|
||||||
Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go
|
Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd go
|
||||||
into NFS network class.
|
into NFS network class.
|
||||||
|
|
||||||
At the same time firefox/lynx will share an appropriate CPU/Memory class
|
At the same time Firefox/Lynx will share an appropriate CPU/Memory class
|
||||||
depending on who launched it (prof/student).
|
depending on who launched it (prof/student).
|
||||||
|
|
||||||
With the ability to classify tasks differently for different resources
|
With the ability to classify tasks differently for different resources
|
||||||
|
@ -325,7 +325,7 @@ and then start a subshell 'sh' in that cgroup:
|
||||||
Creating, modifying, using the cgroups can be done through the cgroup
|
Creating, modifying, using the cgroups can be done through the cgroup
|
||||||
virtual filesystem.
|
virtual filesystem.
|
||||||
|
|
||||||
To mount a cgroup hierarchy will all available subsystems, type:
|
To mount a cgroup hierarchy with all available subsystems, type:
|
||||||
# mount -t cgroup xxx /dev/cgroup
|
# mount -t cgroup xxx /dev/cgroup
|
||||||
|
|
||||||
The "xxx" is not interpreted by the cgroup code, but will appear in
|
The "xxx" is not interpreted by the cgroup code, but will appear in
|
||||||
|
@ -333,12 +333,23 @@ The "xxx" is not interpreted by the cgroup code, but will appear in
|
||||||
|
|
||||||
To mount a cgroup hierarchy with just the cpuset and numtasks
|
To mount a cgroup hierarchy with just the cpuset and numtasks
|
||||||
subsystems, type:
|
subsystems, type:
|
||||||
# mount -t cgroup -o cpuset,numtasks hier1 /dev/cgroup
|
# mount -t cgroup -o cpuset,memory hier1 /dev/cgroup
|
||||||
|
|
||||||
To change the set of subsystems bound to a mounted hierarchy, just
|
To change the set of subsystems bound to a mounted hierarchy, just
|
||||||
remount with different options:
|
remount with different options:
|
||||||
|
# mount -o remount,cpuset,ns hier1 /dev/cgroup
|
||||||
|
|
||||||
# mount -o remount,cpuset,ns /dev/cgroup
|
Now memory is removed from the hierarchy and ns is added.
|
||||||
|
|
||||||
|
Note this will add ns to the hierarchy but won't remove memory or
|
||||||
|
cpuset, because the new options are appended to the old ones:
|
||||||
|
# mount -o remount,ns /dev/cgroup
|
||||||
|
|
||||||
|
To Specify a hierarchy's release_agent:
|
||||||
|
# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
|
||||||
|
xxx /dev/cgroup
|
||||||
|
|
||||||
|
Note that specifying 'release_agent' more than once will return failure.
|
||||||
|
|
||||||
Note that changing the set of subsystems is currently only supported
|
Note that changing the set of subsystems is currently only supported
|
||||||
when the hierarchy consists of a single (root) cgroup. Supporting
|
when the hierarchy consists of a single (root) cgroup. Supporting
|
||||||
|
@ -349,6 +360,11 @@ Then under /dev/cgroup you can find a tree that corresponds to the
|
||||||
tree of the cgroups in the system. For instance, /dev/cgroup
|
tree of the cgroups in the system. For instance, /dev/cgroup
|
||||||
is the cgroup that holds the whole system.
|
is the cgroup that holds the whole system.
|
||||||
|
|
||||||
|
If you want to change the value of release_agent:
|
||||||
|
# echo "/sbin/new_release_agent" > /dev/cgroup/release_agent
|
||||||
|
|
||||||
|
It can also be changed via remount.
|
||||||
|
|
||||||
If you want to create a new cgroup under /dev/cgroup:
|
If you want to create a new cgroup under /dev/cgroup:
|
||||||
# cd /dev/cgroup
|
# cd /dev/cgroup
|
||||||
# mkdir my_cgroup
|
# mkdir my_cgroup
|
||||||
|
@ -476,11 +492,13 @@ cgroup->parent is still valid. (Note - can also be called for a
|
||||||
newly-created cgroup if an error occurs after this subsystem's
|
newly-created cgroup if an error occurs after this subsystem's
|
||||||
create() method has been called for the new cgroup).
|
create() method has been called for the new cgroup).
|
||||||
|
|
||||||
void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
|
int pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
|
||||||
|
|
||||||
Called before checking the reference count on each subsystem. This may
|
Called before checking the reference count on each subsystem. This may
|
||||||
be useful for subsystems which have some extra references even if
|
be useful for subsystems which have some extra references even if
|
||||||
there are not tasks in the cgroup.
|
there are not tasks in the cgroup. If pre_destroy() returns error code,
|
||||||
|
rmdir() will fail with it. From this behavior, pre_destroy() can be
|
||||||
|
called multiple times against a cgroup.
|
||||||
|
|
||||||
int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
|
int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
|
||||||
struct task_struct *task)
|
struct task_struct *task)
|
||||||
|
@ -521,7 +539,7 @@ always handled well.
|
||||||
void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||||
(cgroup_mutex held by caller)
|
(cgroup_mutex held by caller)
|
||||||
|
|
||||||
Called at the end of cgroup_clone() to do any paramater
|
Called at the end of cgroup_clone() to do any parameter
|
||||||
initialization which might be required before a task could attach. For
|
initialization which might be required before a task could attach. For
|
||||||
example in cpusets, no task may attach before 'cpus' and 'mems' are set
|
example in cpusets, no task may attach before 'cpus' and 'mems' are set
|
||||||
up.
|
up.
|
||||||
|
|
|
@ -131,7 +131,7 @@ Cpusets extends these two mechanisms as follows:
|
||||||
- The hierarchy of cpusets can be mounted at /dev/cpuset, for
|
- The hierarchy of cpusets can be mounted at /dev/cpuset, for
|
||||||
browsing and manipulation from user space.
|
browsing and manipulation from user space.
|
||||||
- A cpuset may be marked exclusive, which ensures that no other
|
- A cpuset may be marked exclusive, which ensures that no other
|
||||||
cpuset (except direct ancestors and descendents) may contain
|
cpuset (except direct ancestors and descendants) may contain
|
||||||
any overlapping CPUs or Memory Nodes.
|
any overlapping CPUs or Memory Nodes.
|
||||||
- You can list all the tasks (by pid) attached to any cpuset.
|
- You can list all the tasks (by pid) attached to any cpuset.
|
||||||
|
|
||||||
|
@ -226,7 +226,7 @@ nodes with memory--using the cpuset_track_online_nodes() hook.
|
||||||
--------------------------------
|
--------------------------------
|
||||||
|
|
||||||
If a cpuset is cpu or mem exclusive, no other cpuset, other than
|
If a cpuset is cpu or mem exclusive, no other cpuset, other than
|
||||||
a direct ancestor or descendent, may share any of the same CPUs or
|
a direct ancestor or descendant, may share any of the same CPUs or
|
||||||
Memory Nodes.
|
Memory Nodes.
|
||||||
|
|
||||||
A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled",
|
A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled",
|
||||||
|
@ -427,7 +427,7 @@ child cpusets have this flag enabled.
|
||||||
When doing this, you don't usually want to leave any unpinned tasks in
|
When doing this, you don't usually want to leave any unpinned tasks in
|
||||||
the top cpuset that might use non-trivial amounts of CPU, as such tasks
|
the top cpuset that might use non-trivial amounts of CPU, as such tasks
|
||||||
may be artificially constrained to some subset of CPUs, depending on
|
may be artificially constrained to some subset of CPUs, depending on
|
||||||
the particulars of this flag setting in descendent cpusets. Even if
|
the particulars of this flag setting in descendant cpusets. Even if
|
||||||
such a task could use spare CPU cycles in some other CPUs, the kernel
|
such a task could use spare CPU cycles in some other CPUs, the kernel
|
||||||
scheduler might not consider the possibility of load balancing that
|
scheduler might not consider the possibility of load balancing that
|
||||||
task to that underused CPU.
|
task to that underused CPU.
|
||||||
|
@ -533,7 +533,7 @@ Of course it takes some searching cost to find movable tasks and/or
|
||||||
idle CPUs, the scheduler might not search all CPUs in the domain
|
idle CPUs, the scheduler might not search all CPUs in the domain
|
||||||
every time. In fact, in some architectures, the searching ranges on
|
every time. In fact, in some architectures, the searching ranges on
|
||||||
events are limited in the same socket or node where the CPU locates,
|
events are limited in the same socket or node where the CPU locates,
|
||||||
while the load balance on tick searchs all.
|
while the load balance on tick searches all.
|
||||||
|
|
||||||
For example, assume CPU Z is relatively far from CPU X. Even if CPU Z
|
For example, assume CPU Z is relatively far from CPU X. Even if CPU Z
|
||||||
is idle while CPU X and the siblings are busy, scheduler can't migrate
|
is idle while CPU X and the siblings are busy, scheduler can't migrate
|
||||||
|
@ -601,7 +601,7 @@ its new cpuset, then the task will continue to use whatever subset
|
||||||
of MPOL_BIND nodes are still allowed in the new cpuset. If the task
|
of MPOL_BIND nodes are still allowed in the new cpuset. If the task
|
||||||
was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed
|
was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed
|
||||||
in the new cpuset, then the task will be essentially treated as if it
|
in the new cpuset, then the task will be essentially treated as if it
|
||||||
was MPOL_BIND bound to the new cpuset (even though its numa placement,
|
was MPOL_BIND bound to the new cpuset (even though its NUMA placement,
|
||||||
as queried by get_mempolicy(), doesn't change). If a task is moved
|
as queried by get_mempolicy(), doesn't change). If a task is moved
|
||||||
from one cpuset to another, then the kernel will adjust the tasks
|
from one cpuset to another, then the kernel will adjust the tasks
|
||||||
memory placement, as above, the next time that the kernel attempts
|
memory placement, as above, the next time that the kernel attempts
|
||||||
|
|
|
@ -42,7 +42,7 @@ suffice, but we can decide the best way to adequately restrict
|
||||||
movement as people get some experience with this. We may just want
|
movement as people get some experience with this. We may just want
|
||||||
to require CAP_SYS_ADMIN, which at least is a separate bit from
|
to require CAP_SYS_ADMIN, which at least is a separate bit from
|
||||||
CAP_MKNOD. We may want to just refuse moving to a cgroup which
|
CAP_MKNOD. We may want to just refuse moving to a cgroup which
|
||||||
isn't a descendent of the current one. Or we may want to use
|
isn't a descendant of the current one. Or we may want to use
|
||||||
CAP_MAC_ADMIN, since we really are trying to lock down root.
|
CAP_MAC_ADMIN, since we really are trying to lock down root.
|
||||||
|
|
||||||
CAP_SYS_ADMIN is needed to modify the whitelist or move another
|
CAP_SYS_ADMIN is needed to modify the whitelist or move another
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
Memory Resource Controller(Memcg) Implementation Memo.
|
Memory Resource Controller(Memcg) Implementation Memo.
|
||||||
Last Updated: 2009/1/19
|
Last Updated: 2009/1/20
|
||||||
Base Kernel Version: based on 2.6.29-rc2.
|
Base Kernel Version: based on 2.6.29-rc2.
|
||||||
|
|
||||||
Because VM is getting complex (one of reasons is memcg...), memcg's behavior
|
Because VM is getting complex (one of reasons is memcg...), memcg's behavior
|
||||||
|
@ -356,7 +356,25 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
|
||||||
(Shell-B)
|
(Shell-B)
|
||||||
# move all tasks in /cgroup/test to /cgroup
|
# move all tasks in /cgroup/test to /cgroup
|
||||||
# /sbin/swapoff -a
|
# /sbin/swapoff -a
|
||||||
# rmdir /test/cgroup
|
# rmdir /cgroup/test
|
||||||
# kill malloc task.
|
# kill malloc task.
|
||||||
|
|
||||||
Of course, tmpfs v.s. swapoff test should be tested, too.
|
Of course, tmpfs v.s. swapoff test should be tested, too.
|
||||||
|
|
||||||
|
9.8 OOM-Killer
|
||||||
|
Out-of-memory caused by memcg's limit will kill tasks under
|
||||||
|
the memcg. When hierarchy is used, a task under hierarchy
|
||||||
|
will be killed by the kernel.
|
||||||
|
In this case, panic_on_oom shouldn't be invoked and tasks
|
||||||
|
in other groups shouldn't be killed.
|
||||||
|
|
||||||
|
It's not difficult to cause OOM under memcg as following.
|
||||||
|
Case A) when you can swapoff
|
||||||
|
#swapoff -a
|
||||||
|
#echo 50M > /memory.limit_in_bytes
|
||||||
|
run 51M of malloc
|
||||||
|
|
||||||
|
Case B) when you use mem+swap limitation.
|
||||||
|
#echo 50M > memory.limit_in_bytes
|
||||||
|
#echo 50M > memory.memsw.limit_in_bytes
|
||||||
|
run 51M of malloc
|
||||||
|
|
|
@ -302,7 +302,7 @@ will be charged as a new owner of it.
|
||||||
unevictable - # of pages cannot be reclaimed.(mlocked etc)
|
unevictable - # of pages cannot be reclaimed.(mlocked etc)
|
||||||
|
|
||||||
Below is depend on CONFIG_DEBUG_VM.
|
Below is depend on CONFIG_DEBUG_VM.
|
||||||
inactive_ratio - VM inernal parameter. (see mm/page_alloc.c)
|
inactive_ratio - VM internal parameter. (see mm/page_alloc.c)
|
||||||
recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
|
recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
|
||||||
recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
|
recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
|
||||||
recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
|
recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
|
|
||||||
LINUX ALLOCATED DEVICES (2.6+ version)
|
LINUX ALLOCATED DEVICES (2.6+ version)
|
||||||
|
|
||||||
Maintained by Torben Mathiasen <device@lanana.org>
|
Maintained by Alan Cox <device@lanana.org>
|
||||||
|
|
||||||
Last revised: 29 November 2006
|
Last revised: 6th April 2009
|
||||||
|
|
||||||
This list is the Linux Device List, the official registry of allocated
|
This list is the Linux Device List, the official registry of allocated
|
||||||
device numbers and /dev directory nodes for the Linux operating
|
device numbers and /dev directory nodes for the Linux operating
|
||||||
|
@ -67,6 +67,11 @@ up to date. Due to the number of registrations I have to maintain it
|
||||||
in "batch mode", so there is likely additional registrations that
|
in "batch mode", so there is likely additional registrations that
|
||||||
haven't been listed yet.
|
haven't been listed yet.
|
||||||
|
|
||||||
|
Fourth, remember that Linux now has extensive support for dynamic allocation
|
||||||
|
of device numbering and can use sysfs and udev to handle the naming needs.
|
||||||
|
There are still some exceptions in the serial and boot device area. Before
|
||||||
|
asking for a device number make sure you actually need one.
|
||||||
|
|
||||||
Finally, sometimes I have to play "namespace police." Please don't be
|
Finally, sometimes I have to play "namespace police." Please don't be
|
||||||
offended. I often get submissions for /dev names that would be bound
|
offended. I often get submissions for /dev names that would be bound
|
||||||
to cause conflicts down the road. I am trying to avoid getting in a
|
to cause conflicts down the road. I am trying to avoid getting in a
|
||||||
|
@ -101,7 +106,7 @@ Your cooperation is appreciated.
|
||||||
0 = /dev/ram0 First RAM disk
|
0 = /dev/ram0 First RAM disk
|
||||||
1 = /dev/ram1 Second RAM disk
|
1 = /dev/ram1 Second RAM disk
|
||||||
...
|
...
|
||||||
250 = /dev/initrd Initial RAM disk {2.6}
|
250 = /dev/initrd Initial RAM disk
|
||||||
|
|
||||||
Older kernels had /dev/ramdisk (1, 1) here.
|
Older kernels had /dev/ramdisk (1, 1) here.
|
||||||
/dev/initrd refers to a RAM disk which was preloaded
|
/dev/initrd refers to a RAM disk which was preloaded
|
||||||
|
@ -340,7 +345,7 @@ Your cooperation is appreciated.
|
||||||
14 = /dev/touchscreen/ucb1x00 UCB 1x00 touchscreen
|
14 = /dev/touchscreen/ucb1x00 UCB 1x00 touchscreen
|
||||||
15 = /dev/touchscreen/mk712 MK712 touchscreen
|
15 = /dev/touchscreen/mk712 MK712 touchscreen
|
||||||
128 = /dev/beep Fancy beep device
|
128 = /dev/beep Fancy beep device
|
||||||
129 = /dev/modreq Kernel module load request {2.6}
|
129 =
|
||||||
130 = /dev/watchdog Watchdog timer port
|
130 = /dev/watchdog Watchdog timer port
|
||||||
131 = /dev/temperature Machine internal temperature
|
131 = /dev/temperature Machine internal temperature
|
||||||
132 = /dev/hwtrap Hardware fault trap
|
132 = /dev/hwtrap Hardware fault trap
|
||||||
|
@ -350,10 +355,10 @@ Your cooperation is appreciated.
|
||||||
139 = /dev/openprom SPARC OpenBoot PROM
|
139 = /dev/openprom SPARC OpenBoot PROM
|
||||||
140 = /dev/relay8 Berkshire Products Octal relay card
|
140 = /dev/relay8 Berkshire Products Octal relay card
|
||||||
141 = /dev/relay16 Berkshire Products ISO-16 relay card
|
141 = /dev/relay16 Berkshire Products ISO-16 relay card
|
||||||
142 = /dev/msr x86 model-specific registers {2.6}
|
142 =
|
||||||
143 = /dev/pciconf PCI configuration space
|
143 = /dev/pciconf PCI configuration space
|
||||||
144 = /dev/nvram Non-volatile configuration RAM
|
144 = /dev/nvram Non-volatile configuration RAM
|
||||||
145 = /dev/hfmodem Soundcard shortwave modem control {2.6}
|
145 = /dev/hfmodem Soundcard shortwave modem control
|
||||||
146 = /dev/graphics Linux/SGI graphics device
|
146 = /dev/graphics Linux/SGI graphics device
|
||||||
147 = /dev/opengl Linux/SGI OpenGL pipe
|
147 = /dev/opengl Linux/SGI OpenGL pipe
|
||||||
148 = /dev/gfx Linux/SGI graphics effects device
|
148 = /dev/gfx Linux/SGI graphics effects device
|
||||||
|
@ -435,6 +440,9 @@ Your cooperation is appreciated.
|
||||||
228 = /dev/hpet HPET driver
|
228 = /dev/hpet HPET driver
|
||||||
229 = /dev/fuse Fuse (virtual filesystem in user-space)
|
229 = /dev/fuse Fuse (virtual filesystem in user-space)
|
||||||
230 = /dev/midishare MidiShare driver
|
230 = /dev/midishare MidiShare driver
|
||||||
|
231 = /dev/snapshot System memory snapshot device
|
||||||
|
232 = /dev/kvm Kernel-based virtual machine (hardware virtualization extensions)
|
||||||
|
233 = /dev/kmview View-OS A process with a view
|
||||||
240-254 Reserved for local use
|
240-254 Reserved for local use
|
||||||
255 Reserved for MISC_DYNAMIC_MINOR
|
255 Reserved for MISC_DYNAMIC_MINOR
|
||||||
|
|
||||||
|
@ -466,10 +474,7 @@ Your cooperation is appreciated.
|
||||||
The device names specified are proposed -- if there
|
The device names specified are proposed -- if there
|
||||||
are "standard" names for these devices, please let me know.
|
are "standard" names for these devices, please let me know.
|
||||||
|
|
||||||
12 block MSCDEX CD-ROM callback support {2.6}
|
12 block
|
||||||
0 = /dev/dos_cd0 First MSCDEX CD-ROM
|
|
||||||
1 = /dev/dos_cd1 Second MSCDEX CD-ROM
|
|
||||||
...
|
|
||||||
|
|
||||||
13 char Input core
|
13 char Input core
|
||||||
0 = /dev/input/js0 First joystick
|
0 = /dev/input/js0 First joystick
|
||||||
|
@ -498,7 +503,7 @@ Your cooperation is appreciated.
|
||||||
2 = /dev/midi00 First MIDI port
|
2 = /dev/midi00 First MIDI port
|
||||||
3 = /dev/dsp Digital audio
|
3 = /dev/dsp Digital audio
|
||||||
4 = /dev/audio Sun-compatible digital audio
|
4 = /dev/audio Sun-compatible digital audio
|
||||||
6 = /dev/sndstat Sound card status information {2.6}
|
6 =
|
||||||
7 = /dev/audioctl SPARC audio control device
|
7 = /dev/audioctl SPARC audio control device
|
||||||
8 = /dev/sequencer2 Sequencer -- alternate device
|
8 = /dev/sequencer2 Sequencer -- alternate device
|
||||||
16 = /dev/mixer1 Second soundcard mixer control
|
16 = /dev/mixer1 Second soundcard mixer control
|
||||||
|
@ -510,14 +515,7 @@ Your cooperation is appreciated.
|
||||||
34 = /dev/midi02 Third MIDI port
|
34 = /dev/midi02 Third MIDI port
|
||||||
50 = /dev/midi03 Fourth MIDI port
|
50 = /dev/midi03 Fourth MIDI port
|
||||||
|
|
||||||
14 block BIOS harddrive callback support {2.6}
|
14 block
|
||||||
0 = /dev/dos_hda First BIOS harddrive whole disk
|
|
||||||
64 = /dev/dos_hdb Second BIOS harddrive whole disk
|
|
||||||
128 = /dev/dos_hdc Third BIOS harddrive whole disk
|
|
||||||
192 = /dev/dos_hdd Fourth BIOS harddrive whole disk
|
|
||||||
|
|
||||||
Partitions are handled in the same way as IDE disks
|
|
||||||
(see major number 3).
|
|
||||||
|
|
||||||
15 char Joystick
|
15 char Joystick
|
||||||
0 = /dev/js0 First analog joystick
|
0 = /dev/js0 First analog joystick
|
||||||
|
@ -535,14 +533,14 @@ Your cooperation is appreciated.
|
||||||
16 block GoldStar CD-ROM
|
16 block GoldStar CD-ROM
|
||||||
0 = /dev/gscd GoldStar CD-ROM
|
0 = /dev/gscd GoldStar CD-ROM
|
||||||
|
|
||||||
17 char Chase serial card
|
17 char OBSOLETE (was Chase serial card)
|
||||||
0 = /dev/ttyH0 First Chase port
|
0 = /dev/ttyH0 First Chase port
|
||||||
1 = /dev/ttyH1 Second Chase port
|
1 = /dev/ttyH1 Second Chase port
|
||||||
...
|
...
|
||||||
17 block Optics Storage CD-ROM
|
17 block Optics Storage CD-ROM
|
||||||
0 = /dev/optcd Optics Storage CD-ROM
|
0 = /dev/optcd Optics Storage CD-ROM
|
||||||
|
|
||||||
18 char Chase serial card - alternate devices
|
18 char OBSOLETE (was Chase serial card - alternate devices)
|
||||||
0 = /dev/cuh0 Callout device for ttyH0
|
0 = /dev/cuh0 Callout device for ttyH0
|
||||||
1 = /dev/cuh1 Callout device for ttyH1
|
1 = /dev/cuh1 Callout device for ttyH1
|
||||||
...
|
...
|
||||||
|
@ -644,8 +642,7 @@ Your cooperation is appreciated.
|
||||||
2 = /dev/sbpcd2 Panasonic CD-ROM controller 0 unit 2
|
2 = /dev/sbpcd2 Panasonic CD-ROM controller 0 unit 2
|
||||||
3 = /dev/sbpcd3 Panasonic CD-ROM controller 0 unit 3
|
3 = /dev/sbpcd3 Panasonic CD-ROM controller 0 unit 3
|
||||||
|
|
||||||
26 char Quanta WinVision frame grabber {2.6}
|
26 char
|
||||||
0 = /dev/wvisfgrab Quanta WinVision frame grabber
|
|
||||||
|
|
||||||
26 block Second Matsushita (Panasonic/SoundBlaster) CD-ROM
|
26 block Second Matsushita (Panasonic/SoundBlaster) CD-ROM
|
||||||
0 = /dev/sbpcd4 Panasonic CD-ROM controller 1 unit 0
|
0 = /dev/sbpcd4 Panasonic CD-ROM controller 1 unit 0
|
||||||
|
@ -872,7 +869,7 @@ Your cooperation is appreciated.
|
||||||
and "user level packet I/O." This board is also
|
and "user level packet I/O." This board is also
|
||||||
accessible as a standard networking "eth" device.
|
accessible as a standard networking "eth" device.
|
||||||
|
|
||||||
38 block Reserved for Linux/AP+
|
38 block OBSOLETE (was Linux/AP+)
|
||||||
|
|
||||||
39 char ML-16P experimental I/O board
|
39 char ML-16P experimental I/O board
|
||||||
0 = /dev/ml16pa-a0 First card, first analog channel
|
0 = /dev/ml16pa-a0 First card, first analog channel
|
||||||
|
@ -892,29 +889,16 @@ Your cooperation is appreciated.
|
||||||
50 = /dev/ml16pb-c1 Second card, second counter/timer
|
50 = /dev/ml16pb-c1 Second card, second counter/timer
|
||||||
51 = /dev/ml16pb-c2 Second card, third counter/timer
|
51 = /dev/ml16pb-c2 Second card, third counter/timer
|
||||||
...
|
...
|
||||||
39 block Reserved for Linux/AP+
|
39 block
|
||||||
|
|
||||||
40 char Matrox Meteor frame grabber {2.6}
|
40 char
|
||||||
0 = /dev/mmetfgrab Matrox Meteor frame grabber
|
|
||||||
|
|
||||||
40 block Syquest EZ135 parallel port removable drive
|
40 block
|
||||||
0 = /dev/eza Parallel EZ135 drive, whole disk
|
|
||||||
|
|
||||||
This device is obsolete and will be removed in a
|
|
||||||
future version of Linux. It has been replaced with
|
|
||||||
the parallel port IDE disk driver at major number 45.
|
|
||||||
Partitions are handled in the same way as IDE disks
|
|
||||||
(see major number 3).
|
|
||||||
|
|
||||||
41 char Yet Another Micro Monitor
|
41 char Yet Another Micro Monitor
|
||||||
0 = /dev/yamm Yet Another Micro Monitor
|
0 = /dev/yamm Yet Another Micro Monitor
|
||||||
|
|
||||||
41 block MicroSolutions BackPack parallel port CD-ROM
|
41 block
|
||||||
0 = /dev/bpcd BackPack CD-ROM
|
|
||||||
|
|
||||||
This device is obsolete and will be removed in a
|
|
||||||
future version of Linux. It has been replaced with
|
|
||||||
the parallel port ATAPI CD-ROM driver at major number 46.
|
|
||||||
|
|
||||||
42 char Demo/sample use
|
42 char Demo/sample use
|
||||||
|
|
||||||
|
@ -1681,13 +1665,7 @@ Your cooperation is appreciated.
|
||||||
disks (see major number 3) except that the limit on
|
disks (see major number 3) except that the limit on
|
||||||
partitions is 15.
|
partitions is 15.
|
||||||
|
|
||||||
93 char IBM Smart Capture Card frame grabber {2.6}
|
93 char
|
||||||
0 = /dev/iscc0 First Smart Capture Card
|
|
||||||
1 = /dev/iscc1 Second Smart Capture Card
|
|
||||||
...
|
|
||||||
128 = /dev/isccctl0 First Smart Capture Card control
|
|
||||||
129 = /dev/isccctl1 Second Smart Capture Card control
|
|
||||||
...
|
|
||||||
|
|
||||||
93 block NAND Flash Translation Layer filesystem
|
93 block NAND Flash Translation Layer filesystem
|
||||||
0 = /dev/nftla First NFTL layer
|
0 = /dev/nftla First NFTL layer
|
||||||
|
@ -1695,10 +1673,7 @@ Your cooperation is appreciated.
|
||||||
...
|
...
|
||||||
240 = /dev/nftlp 16th NTFL layer
|
240 = /dev/nftlp 16th NTFL layer
|
||||||
|
|
||||||
94 char miroVIDEO DC10/30 capture/playback device {2.6}
|
94 char
|
||||||
0 = /dev/dcxx0 First capture card
|
|
||||||
1 = /dev/dcxx1 Second capture card
|
|
||||||
...
|
|
||||||
|
|
||||||
94 block IBM S/390 DASD block storage
|
94 block IBM S/390 DASD block storage
|
||||||
0 = /dev/dasda First DASD device, major
|
0 = /dev/dasda First DASD device, major
|
||||||
|
@ -1791,11 +1766,7 @@ Your cooperation is appreciated.
|
||||||
...
|
...
|
||||||
15 = /dev/amiraid/ar?p15 15th partition
|
15 = /dev/amiraid/ar?p15 15th partition
|
||||||
|
|
||||||
102 char Philips SAA5249 Teletext signal decoder {2.6}
|
102 char
|
||||||
0 = /dev/tlk0 First Teletext decoder
|
|
||||||
1 = /dev/tlk1 Second Teletext decoder
|
|
||||||
2 = /dev/tlk2 Third Teletext decoder
|
|
||||||
3 = /dev/tlk3 Fourth Teletext decoder
|
|
||||||
|
|
||||||
102 block Compressed block device
|
102 block Compressed block device
|
||||||
0 = /dev/cbd/a First compressed block device, whole device
|
0 = /dev/cbd/a First compressed block device, whole device
|
||||||
|
@ -1916,10 +1887,7 @@ Your cooperation is appreciated.
|
||||||
DAC960 (see major number 48) except that the limit on
|
DAC960 (see major number 48) except that the limit on
|
||||||
partitions is 15.
|
partitions is 15.
|
||||||
|
|
||||||
111 char Philips SAA7146-based audio/video card {2.6}
|
111 char
|
||||||
0 = /dev/av0 First A/V card
|
|
||||||
1 = /dev/av1 Second A/V card
|
|
||||||
...
|
|
||||||
|
|
||||||
111 block Compaq Next Generation Drive Array, eighth controller
|
111 block Compaq Next Generation Drive Array, eighth controller
|
||||||
0 = /dev/cciss/c7d0 First logical drive, whole disk
|
0 = /dev/cciss/c7d0 First logical drive, whole disk
|
||||||
|
@ -2079,8 +2047,8 @@ Your cooperation is appreciated.
|
||||||
...
|
...
|
||||||
|
|
||||||
119 char VMware virtual network control
|
119 char VMware virtual network control
|
||||||
0 = /dev/vmnet0 1st virtual network
|
0 = /dev/vnet0 1st virtual network
|
||||||
1 = /dev/vmnet1 2nd virtual network
|
1 = /dev/vnet1 2nd virtual network
|
||||||
...
|
...
|
||||||
|
|
||||||
120-127 char LOCAL/EXPERIMENTAL USE
|
120-127 char LOCAL/EXPERIMENTAL USE
|
||||||
|
@ -2450,7 +2418,7 @@ Your cooperation is appreciated.
|
||||||
2 = /dev/raw/raw2 Second raw I/O device
|
2 = /dev/raw/raw2 Second raw I/O device
|
||||||
...
|
...
|
||||||
|
|
||||||
163 char UNASSIGNED (was Radio Tech BIM-XXX-RS232 radio modem - see 51)
|
163 char
|
||||||
|
|
||||||
164 char Chase Research AT/PCI-Fast serial card
|
164 char Chase Research AT/PCI-Fast serial card
|
||||||
0 = /dev/ttyCH0 AT/PCI-Fast board 0, port 0
|
0 = /dev/ttyCH0 AT/PCI-Fast board 0, port 0
|
||||||
|
@ -2542,6 +2510,12 @@ Your cooperation is appreciated.
|
||||||
1 = /dev/clanvi1 Second cLAN adapter
|
1 = /dev/clanvi1 Second cLAN adapter
|
||||||
...
|
...
|
||||||
|
|
||||||
|
179 block MMC block devices
|
||||||
|
0 = /dev/mmcblk0 First SD/MMC card
|
||||||
|
1 = /dev/mmcblk0p1 First partition on first MMC card
|
||||||
|
8 = /dev/mmcblk1 Second SD/MMC card
|
||||||
|
...
|
||||||
|
|
||||||
179 char CCube DVXChip-based PCI products
|
179 char CCube DVXChip-based PCI products
|
||||||
0 = /dev/dvxirq0 First DVX device
|
0 = /dev/dvxirq0 First DVX device
|
||||||
1 = /dev/dvxirq1 Second DVX device
|
1 = /dev/dvxirq1 Second DVX device
|
||||||
|
@ -2560,6 +2534,9 @@ Your cooperation is appreciated.
|
||||||
96 = /dev/usb/hiddev0 1st USB HID device
|
96 = /dev/usb/hiddev0 1st USB HID device
|
||||||
...
|
...
|
||||||
111 = /dev/usb/hiddev15 16th USB HID device
|
111 = /dev/usb/hiddev15 16th USB HID device
|
||||||
|
112 = /dev/usb/auer0 1st auerswald ISDN device
|
||||||
|
...
|
||||||
|
127 = /dev/usb/auer15 16th auerswald ISDN device
|
||||||
128 = /dev/usb/brlvgr0 First Braille Voyager device
|
128 = /dev/usb/brlvgr0 First Braille Voyager device
|
||||||
...
|
...
|
||||||
131 = /dev/usb/brlvgr3 Fourth Braille Voyager device
|
131 = /dev/usb/brlvgr3 Fourth Braille Voyager device
|
||||||
|
@ -2810,6 +2787,20 @@ Your cooperation is appreciated.
|
||||||
...
|
...
|
||||||
190 = /dev/ttyUL3 Xilinx uartlite - port 3
|
190 = /dev/ttyUL3 Xilinx uartlite - port 3
|
||||||
191 = /dev/xvc0 Xen virtual console - port 0
|
191 = /dev/xvc0 Xen virtual console - port 0
|
||||||
|
192 = /dev/ttyPZ0 pmac_zilog - port 0
|
||||||
|
...
|
||||||
|
195 = /dev/ttyPZ3 pmac_zilog - port 3
|
||||||
|
196 = /dev/ttyTX0 TX39/49 serial port 0
|
||||||
|
...
|
||||||
|
204 = /dev/ttyTX7 TX39/49 serial port 7
|
||||||
|
205 = /dev/ttySC0 SC26xx serial port 0
|
||||||
|
206 = /dev/ttySC1 SC26xx serial port 1
|
||||||
|
207 = /dev/ttySC2 SC26xx serial port 2
|
||||||
|
208 = /dev/ttySC3 SC26xx serial port 3
|
||||||
|
209 = /dev/ttyMAX0 MAX3100 serial port 0
|
||||||
|
210 = /dev/ttyMAX1 MAX3100 serial port 1
|
||||||
|
211 = /dev/ttyMAX2 MAX3100 serial port 2
|
||||||
|
212 = /dev/ttyMAX3 MAX3100 serial port 3
|
||||||
|
|
||||||
205 char Low-density serial ports (alternate device)
|
205 char Low-density serial ports (alternate device)
|
||||||
0 = /dev/culu0 Callout device for ttyLU0
|
0 = /dev/culu0 Callout device for ttyLU0
|
||||||
|
@ -3145,6 +3136,14 @@ Your cooperation is appreciated.
|
||||||
1 = /dev/blockrom1 Second ROM card's translation layer interface
|
1 = /dev/blockrom1 Second ROM card's translation layer interface
|
||||||
...
|
...
|
||||||
|
|
||||||
|
259 block Block Extended Major
|
||||||
|
Used dynamically to hold additional partition minor
|
||||||
|
numbers and allow large numbers of partitions per device
|
||||||
|
|
||||||
|
259 char FPGA configuration interfaces
|
||||||
|
0 = /dev/icap0 First Xilinx internal configuration
|
||||||
|
1 = /dev/icap1 Second Xilinx internal configuration
|
||||||
|
|
||||||
260 char OSD (Object-based-device) SCSI Device
|
260 char OSD (Object-based-device) SCSI Device
|
||||||
0 = /dev/osd0 First OSD Device
|
0 = /dev/osd0 First OSD Device
|
||||||
1 = /dev/osd1 Second OSD Device
|
1 = /dev/osd1 Second OSD Device
|
||||||
|
|
|
@ -59,7 +59,8 @@ Accepted options:
|
||||||
ypan Enable display panning using the VESA protected mode
|
ypan Enable display panning using the VESA protected mode
|
||||||
interface. The visible screen is just a window of the
|
interface. The visible screen is just a window of the
|
||||||
video memory, console scrolling is done by changing the
|
video memory, console scrolling is done by changing the
|
||||||
start of the window. Available on x86 only.
|
start of the window. This option is available on x86
|
||||||
|
only and is the default option on that architecture.
|
||||||
|
|
||||||
ywrap Same as ypan, but assumes your gfx board can wrap-around
|
ywrap Same as ypan, but assumes your gfx board can wrap-around
|
||||||
the video memory (i.e. starts reading from top if it
|
the video memory (i.e. starts reading from top if it
|
||||||
|
@ -67,7 +68,7 @@ ywrap Same as ypan, but assumes your gfx board can wrap-around
|
||||||
Available on x86 only.
|
Available on x86 only.
|
||||||
|
|
||||||
redraw Scroll by redrawing the affected part of the screen, this
|
redraw Scroll by redrawing the affected part of the screen, this
|
||||||
is the safe (and slow) default.
|
is the default on non-x86.
|
||||||
|
|
||||||
(If you're using uvesafb as a module, the above three options are
|
(If you're using uvesafb as a module, the above three options are
|
||||||
used a parameter of the scroll option, e.g. scroll=ypan.)
|
used a parameter of the scroll option, e.g. scroll=ypan.)
|
||||||
|
@ -182,7 +183,7 @@ from the Video BIOS if you set pixclock to 0 in fb_var_screeninfo.
|
||||||
|
|
||||||
--
|
--
|
||||||
Michal Januszewski <spock@gentoo.org>
|
Michal Januszewski <spock@gentoo.org>
|
||||||
Last updated: 2007-06-16
|
Last updated: 2009-03-30
|
||||||
|
|
||||||
Documentation of the uvesafb options is loosely based on vesafb.txt.
|
Documentation of the uvesafb options is loosely based on vesafb.txt.
|
||||||
|
|
||||||
|
|
|
@ -255,6 +255,16 @@ Who: Jan Engelhardt <jengelh@computergmbh.de>
|
||||||
|
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
|
What: GPIO autorequest on gpio_direction_{input,output}() in gpiolib
|
||||||
|
When: February 2010
|
||||||
|
Why: All callers should use explicit gpio_request()/gpio_free().
|
||||||
|
The autorequest mechanism in gpiolib was provided mostly as a
|
||||||
|
migration aid for legacy GPIO interfaces (for SOC based GPIOs).
|
||||||
|
Those users have now largely migrated. Platforms implementing
|
||||||
|
the GPIO interfaces without using gpiolib will see no changes.
|
||||||
|
Who: David Brownell <dbrownell@users.sourceforge.net>
|
||||||
|
---------------------------
|
||||||
|
|
||||||
What: b43 support for firmware revision < 410
|
What: b43 support for firmware revision < 410
|
||||||
When: The schedule was July 2008, but it was decided that we are going to keep the
|
When: The schedule was July 2008, but it was decided that we are going to keep the
|
||||||
code as long as there are no major maintanance headaches.
|
code as long as there are no major maintanance headaches.
|
||||||
|
@ -273,13 +283,6 @@ Who: Glauber Costa <gcosta@redhat.com>
|
||||||
|
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
What: remove HID compat support
|
|
||||||
When: 2.6.29
|
|
||||||
Why: needed only as a temporary solution until distros fix themselves up
|
|
||||||
Who: Jiri Slaby <jirislaby@gmail.com>
|
|
||||||
|
|
||||||
---------------------------
|
|
||||||
|
|
||||||
What: print_fn_descriptor_symbol()
|
What: print_fn_descriptor_symbol()
|
||||||
When: October 2009
|
When: October 2009
|
||||||
Why: The %pF vsprintf format provides the same functionality in a
|
Why: The %pF vsprintf format provides the same functionality in a
|
||||||
|
@ -351,7 +354,8 @@ Who: Krzysztof Piotr Oledzki <ole@ans.pl>
|
||||||
|
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
||||||
What: i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client()
|
What: i2c_attach_client(), i2c_detach_client(), i2c_driver->detach_client(),
|
||||||
|
i2c_adapter->client_register(), i2c_adapter->client_unregister
|
||||||
When: 2.6.30
|
When: 2.6.30
|
||||||
Check: i2c_attach_client i2c_detach_client
|
Check: i2c_attach_client i2c_detach_client
|
||||||
Why: Deprecated by the new (standard) device driver binding model. Use
|
Why: Deprecated by the new (standard) device driver binding model. Use
|
||||||
|
|
|
@ -68,6 +68,8 @@ ncpfs.txt
|
||||||
- info on Novell Netware(tm) filesystem using NCP protocol.
|
- info on Novell Netware(tm) filesystem using NCP protocol.
|
||||||
nfsroot.txt
|
nfsroot.txt
|
||||||
- short guide on setting up a diskless box with NFS root filesystem.
|
- short guide on setting up a diskless box with NFS root filesystem.
|
||||||
|
nilfs2.txt
|
||||||
|
- info and mount options for the NILFS2 filesystem.
|
||||||
ntfs.txt
|
ntfs.txt
|
||||||
- info and mount options for the NTFS filesystem (Windows NT).
|
- info and mount options for the NTFS filesystem (Windows NT).
|
||||||
ocfs2.txt
|
ocfs2.txt
|
||||||
|
|
658
Documentation/filesystems/caching/backend-api.txt
Normal file
658
Documentation/filesystems/caching/backend-api.txt
Normal file
|
@ -0,0 +1,658 @@
|
||||||
|
==========================
|
||||||
|
FS-CACHE CACHE BACKEND API
|
||||||
|
==========================
|
||||||
|
|
||||||
|
The FS-Cache system provides an API by which actual caches can be supplied to
|
||||||
|
FS-Cache for it to then serve out to network filesystems and other interested
|
||||||
|
parties.
|
||||||
|
|
||||||
|
This API is declared in <linux/fscache-cache.h>.
|
||||||
|
|
||||||
|
|
||||||
|
====================================
|
||||||
|
INITIALISING AND REGISTERING A CACHE
|
||||||
|
====================================
|
||||||
|
|
||||||
|
To start off, a cache definition must be initialised and registered for each
|
||||||
|
cache the backend wants to make available. For instance, CacheFS does this in
|
||||||
|
the fill_super() operation on mounting.
|
||||||
|
|
||||||
|
The cache definition (struct fscache_cache) should be initialised by calling:
|
||||||
|
|
||||||
|
void fscache_init_cache(struct fscache_cache *cache,
|
||||||
|
struct fscache_cache_ops *ops,
|
||||||
|
const char *idfmt,
|
||||||
|
...);
|
||||||
|
|
||||||
|
Where:
|
||||||
|
|
||||||
|
(*) "cache" is a pointer to the cache definition;
|
||||||
|
|
||||||
|
(*) "ops" is a pointer to the table of operations that the backend supports on
|
||||||
|
this cache; and
|
||||||
|
|
||||||
|
(*) "idfmt" is a format and printf-style arguments for constructing a label
|
||||||
|
for the cache.
|
||||||
|
|
||||||
|
|
||||||
|
The cache should then be registered with FS-Cache by passing a pointer to the
|
||||||
|
previously initialised cache definition to:
|
||||||
|
|
||||||
|
int fscache_add_cache(struct fscache_cache *cache,
|
||||||
|
struct fscache_object *fsdef,
|
||||||
|
const char *tagname);
|
||||||
|
|
||||||
|
Two extra arguments should also be supplied:
|
||||||
|
|
||||||
|
(*) "fsdef" which should point to the object representation for the FS-Cache
|
||||||
|
master index in this cache. Netfs primary index entries will be created
|
||||||
|
here. FS-Cache keeps the caller's reference to the index object if
|
||||||
|
successful and will release it upon withdrawal of the cache.
|
||||||
|
|
||||||
|
(*) "tagname" which, if given, should be a text string naming this cache. If
|
||||||
|
this is NULL, the identifier will be used instead. For CacheFS, the
|
||||||
|
identifier is set to name the underlying block device and the tag can be
|
||||||
|
supplied by mount.
|
||||||
|
|
||||||
|
This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag
|
||||||
|
is already in use. 0 will be returned on success.
|
||||||
|
|
||||||
|
|
||||||
|
=====================
|
||||||
|
UNREGISTERING A CACHE
|
||||||
|
=====================
|
||||||
|
|
||||||
|
A cache can be withdrawn from the system by calling this function with a
|
||||||
|
pointer to the cache definition:
|
||||||
|
|
||||||
|
void fscache_withdraw_cache(struct fscache_cache *cache);
|
||||||
|
|
||||||
|
In CacheFS's case, this is called by put_super().
|
||||||
|
|
||||||
|
|
||||||
|
========
|
||||||
|
SECURITY
|
||||||
|
========
|
||||||
|
|
||||||
|
The cache methods are executed one of two contexts:
|
||||||
|
|
||||||
|
(1) that of the userspace process that issued the netfs operation that caused
|
||||||
|
the cache method to be invoked, or
|
||||||
|
|
||||||
|
(2) that of one of the processes in the FS-Cache thread pool.
|
||||||
|
|
||||||
|
In either case, this may not be an appropriate context in which to access the
|
||||||
|
cache.
|
||||||
|
|
||||||
|
The calling process's fsuid, fsgid and SELinux security identities may need to
|
||||||
|
be masqueraded for the duration of the cache driver's access to the cache.
|
||||||
|
This is left to the cache to handle; FS-Cache makes no effort in this regard.
|
||||||
|
|
||||||
|
|
||||||
|
===================================
|
||||||
|
CONTROL AND STATISTICS PRESENTATION
|
||||||
|
===================================
|
||||||
|
|
||||||
|
The cache may present data to the outside world through FS-Cache's interfaces
|
||||||
|
in sysfs and procfs - the former for control and the latter for statistics.
|
||||||
|
|
||||||
|
A sysfs directory called /sys/fs/fscache/<cachetag>/ is created if CONFIG_SYSFS
|
||||||
|
is enabled. This is accessible through the kobject struct fscache_cache::kobj
|
||||||
|
and is for use by the cache as it sees fit.
|
||||||
|
|
||||||
|
|
||||||
|
========================
|
||||||
|
RELEVANT DATA STRUCTURES
|
||||||
|
========================
|
||||||
|
|
||||||
|
(*) Index/Data file FS-Cache representation cookie:
|
||||||
|
|
||||||
|
struct fscache_cookie {
|
||||||
|
struct fscache_object_def *def;
|
||||||
|
struct fscache_netfs *netfs;
|
||||||
|
void *netfs_data;
|
||||||
|
...
|
||||||
|
};
|
||||||
|
|
||||||
|
The fields that might be of use to the backend describe the object
|
||||||
|
definition, the netfs definition and the netfs's data for this cookie.
|
||||||
|
The object definition contain functions supplied by the netfs for loading
|
||||||
|
and matching index entries; these are required to provide some of the
|
||||||
|
cache operations.
|
||||||
|
|
||||||
|
|
||||||
|
(*) In-cache object representation:
|
||||||
|
|
||||||
|
struct fscache_object {
|
||||||
|
int debug_id;
|
||||||
|
enum {
|
||||||
|
FSCACHE_OBJECT_RECYCLING,
|
||||||
|
...
|
||||||
|
} state;
|
||||||
|
spinlock_t lock
|
||||||
|
struct fscache_cache *cache;
|
||||||
|
struct fscache_cookie *cookie;
|
||||||
|
...
|
||||||
|
};
|
||||||
|
|
||||||
|
Structures of this type should be allocated by the cache backend and
|
||||||
|
passed to FS-Cache when requested by the appropriate cache operation. In
|
||||||
|
the case of CacheFS, they're embedded in CacheFS's internal object
|
||||||
|
structures.
|
||||||
|
|
||||||
|
The debug_id is a simple integer that can be used in debugging messages
|
||||||
|
that refer to a particular object. In such a case it should be printed
|
||||||
|
using "OBJ%x" to be consistent with FS-Cache.
|
||||||
|
|
||||||
|
Each object contains a pointer to the cookie that represents the object it
|
||||||
|
is backing. An object should retired when put_object() is called if it is
|
||||||
|
in state FSCACHE_OBJECT_RECYCLING. The fscache_object struct should be
|
||||||
|
initialised by calling fscache_object_init(object).
|
||||||
|
|
||||||
|
|
||||||
|
(*) FS-Cache operation record:
|
||||||
|
|
||||||
|
struct fscache_operation {
|
||||||
|
atomic_t usage;
|
||||||
|
struct fscache_object *object;
|
||||||
|
unsigned long flags;
|
||||||
|
#define FSCACHE_OP_EXCLUSIVE
|
||||||
|
void (*processor)(struct fscache_operation *op);
|
||||||
|
void (*release)(struct fscache_operation *op);
|
||||||
|
...
|
||||||
|
};
|
||||||
|
|
||||||
|
FS-Cache has a pool of threads that it uses to give CPU time to the
|
||||||
|
various asynchronous operations that need to be done as part of driving
|
||||||
|
the cache. These are represented by the above structure. The processor
|
||||||
|
method is called to give the op CPU time, and the release method to get
|
||||||
|
rid of it when its usage count reaches 0.
|
||||||
|
|
||||||
|
An operation can be made exclusive upon an object by setting the
|
||||||
|
appropriate flag before enqueuing it with fscache_enqueue_operation(). If
|
||||||
|
an operation needs more processing time, it should be enqueued again.
|
||||||
|
|
||||||
|
|
||||||
|
(*) FS-Cache retrieval operation record:
|
||||||
|
|
||||||
|
struct fscache_retrieval {
|
||||||
|
struct fscache_operation op;
|
||||||
|
struct address_space *mapping;
|
||||||
|
struct list_head *to_do;
|
||||||
|
...
|
||||||
|
};
|
||||||
|
|
||||||
|
A structure of this type is allocated by FS-Cache to record retrieval and
|
||||||
|
allocation requests made by the netfs. This struct is then passed to the
|
||||||
|
backend to do the operation. The backend may get extra refs to it by
|
||||||
|
calling fscache_get_retrieval() and refs may be discarded by calling
|
||||||
|
fscache_put_retrieval().
|
||||||
|
|
||||||
|
A retrieval operation can be used by the backend to do retrieval work. To
|
||||||
|
do this, the retrieval->op.processor method pointer should be set
|
||||||
|
appropriately by the backend and fscache_enqueue_retrieval() called to
|
||||||
|
submit it to the thread pool. CacheFiles, for example, uses this to queue
|
||||||
|
page examination when it detects PG_lock being cleared.
|
||||||
|
|
||||||
|
The to_do field is an empty list available for the cache backend to use as
|
||||||
|
it sees fit.
|
||||||
|
|
||||||
|
|
||||||
|
(*) FS-Cache storage operation record:
|
||||||
|
|
||||||
|
struct fscache_storage {
|
||||||
|
struct fscache_operation op;
|
||||||
|
pgoff_t store_limit;
|
||||||
|
...
|
||||||
|
};
|
||||||
|
|
||||||
|
A structure of this type is allocated by FS-Cache to record outstanding
|
||||||
|
writes to be made. FS-Cache itself enqueues this operation and invokes
|
||||||
|
the write_page() method on the object at appropriate times to effect
|
||||||
|
storage.
|
||||||
|
|
||||||
|
|
||||||
|
================
|
||||||
|
CACHE OPERATIONS
|
||||||
|
================
|
||||||
|
|
||||||
|
The cache backend provides FS-Cache with a table of operations that can be
|
||||||
|
performed on the denizens of the cache. These are held in a structure of type:
|
||||||
|
|
||||||
|
struct fscache_cache_ops
|
||||||
|
|
||||||
|
(*) Name of cache provider [mandatory]:
|
||||||
|
|
||||||
|
const char *name
|
||||||
|
|
||||||
|
This isn't strictly an operation, but should be pointed at a string naming
|
||||||
|
the backend.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Allocate a new object [mandatory]:
|
||||||
|
|
||||||
|
struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
|
||||||
|
struct fscache_cookie *cookie)
|
||||||
|
|
||||||
|
This method is used to allocate a cache object representation to back a
|
||||||
|
cookie in a particular cache. fscache_object_init() should be called on
|
||||||
|
the object to initialise it prior to returning.
|
||||||
|
|
||||||
|
This function may also be used to parse the index key to be used for
|
||||||
|
multiple lookup calls to turn it into a more convenient form. FS-Cache
|
||||||
|
will call the lookup_complete() method to allow the cache to release the
|
||||||
|
form once lookup is complete or aborted.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Look up and create object [mandatory]:
|
||||||
|
|
||||||
|
void (*lookup_object)(struct fscache_object *object)
|
||||||
|
|
||||||
|
This method is used to look up an object, given that the object is already
|
||||||
|
allocated and attached to the cookie. This should instantiate that object
|
||||||
|
in the cache if it can.
|
||||||
|
|
||||||
|
The method should call fscache_object_lookup_negative() as soon as
|
||||||
|
possible if it determines the object doesn't exist in the cache. If the
|
||||||
|
object is found to exist and the netfs indicates that it is valid then
|
||||||
|
fscache_obtained_object() should be called once the object is in a
|
||||||
|
position to have data stored in it. Similarly, fscache_obtained_object()
|
||||||
|
should also be called once a non-present object has been created.
|
||||||
|
|
||||||
|
If a lookup error occurs, fscache_object_lookup_error() should be called
|
||||||
|
to abort the lookup of that object.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Release lookup data [mandatory]:
|
||||||
|
|
||||||
|
void (*lookup_complete)(struct fscache_object *object)
|
||||||
|
|
||||||
|
This method is called to ask the cache to release any resources it was
|
||||||
|
using to perform a lookup.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Increment object refcount [mandatory]:
|
||||||
|
|
||||||
|
struct fscache_object *(*grab_object)(struct fscache_object *object)
|
||||||
|
|
||||||
|
This method is called to increment the reference count on an object. It
|
||||||
|
may fail (for instance if the cache is being withdrawn) by returning NULL.
|
||||||
|
It should return the object pointer if successful.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Lock/Unlock object [mandatory]:
|
||||||
|
|
||||||
|
void (*lock_object)(struct fscache_object *object)
|
||||||
|
void (*unlock_object)(struct fscache_object *object)
|
||||||
|
|
||||||
|
These methods are used to exclusively lock an object. It must be possible
|
||||||
|
to schedule with the lock held, so a spinlock isn't sufficient.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Pin/Unpin object [optional]:
|
||||||
|
|
||||||
|
int (*pin_object)(struct fscache_object *object)
|
||||||
|
void (*unpin_object)(struct fscache_object *object)
|
||||||
|
|
||||||
|
These methods are used to pin an object into the cache. Once pinned an
|
||||||
|
object cannot be reclaimed to make space. Return -ENOSPC if there's not
|
||||||
|
enough space in the cache to permit this.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Update object [mandatory]:
|
||||||
|
|
||||||
|
int (*update_object)(struct fscache_object *object)
|
||||||
|
|
||||||
|
This is called to update the index entry for the specified object. The
|
||||||
|
new information should be in object->cookie->netfs_data. This can be
|
||||||
|
obtained by calling object->cookie->def->get_aux()/get_attr().
|
||||||
|
|
||||||
|
|
||||||
|
(*) Discard object [mandatory]:
|
||||||
|
|
||||||
|
void (*drop_object)(struct fscache_object *object)
|
||||||
|
|
||||||
|
This method is called to indicate that an object has been unbound from its
|
||||||
|
cookie, and that the cache should release the object's resources and
|
||||||
|
retire it if it's in state FSCACHE_OBJECT_RECYCLING.
|
||||||
|
|
||||||
|
This method should not attempt to release any references held by the
|
||||||
|
caller. The caller will invoke the put_object() method as appropriate.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Release object reference [mandatory]:
|
||||||
|
|
||||||
|
void (*put_object)(struct fscache_object *object)
|
||||||
|
|
||||||
|
This method is used to discard a reference to an object. The object may
|
||||||
|
be freed when all the references to it are released.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Synchronise a cache [mandatory]:
|
||||||
|
|
||||||
|
void (*sync)(struct fscache_cache *cache)
|
||||||
|
|
||||||
|
This is called to ask the backend to synchronise a cache with its backing
|
||||||
|
device.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Dissociate a cache [mandatory]:
|
||||||
|
|
||||||
|
void (*dissociate_pages)(struct fscache_cache *cache)
|
||||||
|
|
||||||
|
This is called to ask a cache to perform any page dissociations as part of
|
||||||
|
cache withdrawal.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Notification that the attributes on a netfs file changed [mandatory]:
|
||||||
|
|
||||||
|
int (*attr_changed)(struct fscache_object *object);
|
||||||
|
|
||||||
|
This is called to indicate to the cache that certain attributes on a netfs
|
||||||
|
file have changed (for example the maximum size a file may reach). The
|
||||||
|
cache can read these from the netfs by calling the cookie's get_attr()
|
||||||
|
method.
|
||||||
|
|
||||||
|
The cache may use the file size information to reserve space on the cache.
|
||||||
|
It should also call fscache_set_store_limit() to indicate to FS-Cache the
|
||||||
|
highest byte it's willing to store for an object.
|
||||||
|
|
||||||
|
This method may return -ve if an error occurred or the cache object cannot
|
||||||
|
be expanded. In such a case, the object will be withdrawn from service.
|
||||||
|
|
||||||
|
This operation is run asynchronously from FS-Cache's thread pool, and
|
||||||
|
storage and retrieval operations from the netfs are excluded during the
|
||||||
|
execution of this operation.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Reserve cache space for an object's data [optional]:
|
||||||
|
|
||||||
|
int (*reserve_space)(struct fscache_object *object, loff_t size);
|
||||||
|
|
||||||
|
This is called to request that cache space be reserved to hold the data
|
||||||
|
for an object and the metadata used to track it. Zero size should be
|
||||||
|
taken as request to cancel a reservation.
|
||||||
|
|
||||||
|
This should return 0 if successful, -ENOSPC if there isn't enough space
|
||||||
|
available, or -ENOMEM or -EIO on other errors.
|
||||||
|
|
||||||
|
The reservation may exceed the current size of the object, thus permitting
|
||||||
|
future expansion. If the amount of space consumed by an object would
|
||||||
|
exceed the reservation, it's permitted to refuse requests to allocate
|
||||||
|
pages, but not required. An object may be pruned down to its reservation
|
||||||
|
size if larger than that already.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Request page be read from cache [mandatory]:
|
||||||
|
|
||||||
|
int (*read_or_alloc_page)(struct fscache_retrieval *op,
|
||||||
|
struct page *page,
|
||||||
|
gfp_t gfp)
|
||||||
|
|
||||||
|
This is called to attempt to read a netfs page from the cache, or to
|
||||||
|
reserve a backing block if not. FS-Cache will have done as much checking
|
||||||
|
as it can before calling, but most of the work belongs to the backend.
|
||||||
|
|
||||||
|
If there's no page in the cache, then -ENODATA should be returned if the
|
||||||
|
backend managed to reserve a backing block; -ENOBUFS or -ENOMEM if it
|
||||||
|
didn't.
|
||||||
|
|
||||||
|
If there is suitable data in the cache, then a read operation should be
|
||||||
|
queued and 0 returned. When the read finishes, fscache_end_io() should be
|
||||||
|
called.
|
||||||
|
|
||||||
|
The fscache_mark_pages_cached() should be called for the page if any cache
|
||||||
|
metadata is retained. This will indicate to the netfs that the page needs
|
||||||
|
explicit uncaching. This operation takes a pagevec, thus allowing several
|
||||||
|
pages to be marked at once.
|
||||||
|
|
||||||
|
The retrieval record pointed to by op should be retained for each page
|
||||||
|
queued and released when I/O on the page has been formally ended.
|
||||||
|
fscache_get/put_retrieval() are available for this purpose.
|
||||||
|
|
||||||
|
The retrieval record may be used to get CPU time via the FS-Cache thread
|
||||||
|
pool. If this is desired, the op->op.processor should be set to point to
|
||||||
|
the appropriate processing routine, and fscache_enqueue_retrieval() should
|
||||||
|
be called at an appropriate point to request CPU time. For instance, the
|
||||||
|
retrieval routine could be enqueued upon the completion of a disk read.
|
||||||
|
The to_do field in the retrieval record is provided to aid in this.
|
||||||
|
|
||||||
|
If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS
|
||||||
|
returned if possible or fscache_end_io() called with a suitable error
|
||||||
|
code..
|
||||||
|
|
||||||
|
|
||||||
|
(*) Request pages be read from cache [mandatory]:
|
||||||
|
|
||||||
|
int (*read_or_alloc_pages)(struct fscache_retrieval *op,
|
||||||
|
struct list_head *pages,
|
||||||
|
unsigned *nr_pages,
|
||||||
|
gfp_t gfp)
|
||||||
|
|
||||||
|
This is like the read_or_alloc_page() method, except it is handed a list
|
||||||
|
of pages instead of one page. Any pages on which a read operation is
|
||||||
|
started must be added to the page cache for the specified mapping and also
|
||||||
|
to the LRU. Such pages must also be removed from the pages list and
|
||||||
|
*nr_pages decremented per page.
|
||||||
|
|
||||||
|
If there was an error such as -ENOMEM, then that should be returned; else
|
||||||
|
if one or more pages couldn't be read or allocated, then -ENOBUFS should
|
||||||
|
be returned; else if one or more pages couldn't be read, then -ENODATA
|
||||||
|
should be returned. If all the pages are dispatched then 0 should be
|
||||||
|
returned.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Request page be allocated in the cache [mandatory]:
|
||||||
|
|
||||||
|
int (*allocate_page)(struct fscache_retrieval *op,
|
||||||
|
struct page *page,
|
||||||
|
gfp_t gfp)
|
||||||
|
|
||||||
|
This is like the read_or_alloc_page() method, except that it shouldn't
|
||||||
|
read from the cache, even if there's data there that could be retrieved.
|
||||||
|
It should, however, set up any internal metadata required such that
|
||||||
|
the write_page() method can write to the cache.
|
||||||
|
|
||||||
|
If there's no backing block available, then -ENOBUFS should be returned
|
||||||
|
(or -ENOMEM if there were other problems). If a block is successfully
|
||||||
|
allocated, then the netfs page should be marked and 0 returned.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Request pages be allocated in the cache [mandatory]:
|
||||||
|
|
||||||
|
int (*allocate_pages)(struct fscache_retrieval *op,
|
||||||
|
struct list_head *pages,
|
||||||
|
unsigned *nr_pages,
|
||||||
|
gfp_t gfp)
|
||||||
|
|
||||||
|
This is an multiple page version of the allocate_page() method. pages and
|
||||||
|
nr_pages should be treated as for the read_or_alloc_pages() method.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Request page be written to cache [mandatory]:
|
||||||
|
|
||||||
|
int (*write_page)(struct fscache_storage *op,
|
||||||
|
struct page *page);
|
||||||
|
|
||||||
|
This is called to write from a page on which there was a previously
|
||||||
|
successful read_or_alloc_page() call or similar. FS-Cache filters out
|
||||||
|
pages that don't have mappings.
|
||||||
|
|
||||||
|
This method is called asynchronously from the FS-Cache thread pool. It is
|
||||||
|
not required to actually store anything, provided -ENODATA is then
|
||||||
|
returned to the next read of this page.
|
||||||
|
|
||||||
|
If an error occurred, then a negative error code should be returned,
|
||||||
|
otherwise zero should be returned. FS-Cache will take appropriate action
|
||||||
|
in response to an error, such as withdrawing this object.
|
||||||
|
|
||||||
|
If this method returns success then FS-Cache will inform the netfs
|
||||||
|
appropriately.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Discard retained per-page metadata [mandatory]:
|
||||||
|
|
||||||
|
void (*uncache_page)(struct fscache_object *object, struct page *page)
|
||||||
|
|
||||||
|
This is called when a netfs page is being evicted from the pagecache. The
|
||||||
|
cache backend should tear down any internal representation or tracking it
|
||||||
|
maintains for this page.
|
||||||
|
|
||||||
|
|
||||||
|
==================
|
||||||
|
FS-CACHE UTILITIES
|
||||||
|
==================
|
||||||
|
|
||||||
|
FS-Cache provides some utilities that a cache backend may make use of:
|
||||||
|
|
||||||
|
(*) Note occurrence of an I/O error in a cache:
|
||||||
|
|
||||||
|
void fscache_io_error(struct fscache_cache *cache)
|
||||||
|
|
||||||
|
This tells FS-Cache that an I/O error occurred in the cache. After this
|
||||||
|
has been called, only resource dissociation operations (object and page
|
||||||
|
release) will be passed from the netfs to the cache backend for the
|
||||||
|
specified cache.
|
||||||
|
|
||||||
|
This does not actually withdraw the cache. That must be done separately.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Invoke the retrieval I/O completion function:
|
||||||
|
|
||||||
|
void fscache_end_io(struct fscache_retrieval *op, struct page *page,
|
||||||
|
int error);
|
||||||
|
|
||||||
|
This is called to note the end of an attempt to retrieve a page. The
|
||||||
|
error value should be 0 if successful and an error otherwise.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Set highest store limit:
|
||||||
|
|
||||||
|
void fscache_set_store_limit(struct fscache_object *object,
|
||||||
|
loff_t i_size);
|
||||||
|
|
||||||
|
This sets the limit FS-Cache imposes on the highest byte it's willing to
|
||||||
|
try and store for a netfs. Any page over this limit is automatically
|
||||||
|
rejected by fscache_read_alloc_page() and co with -ENOBUFS.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Mark pages as being cached:
|
||||||
|
|
||||||
|
void fscache_mark_pages_cached(struct fscache_retrieval *op,
|
||||||
|
struct pagevec *pagevec);
|
||||||
|
|
||||||
|
This marks a set of pages as being cached. After this has been called,
|
||||||
|
the netfs must call fscache_uncache_page() to unmark the pages.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Perform coherency check on an object:
|
||||||
|
|
||||||
|
enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
|
||||||
|
const void *data,
|
||||||
|
uint16_t datalen);
|
||||||
|
|
||||||
|
This asks the netfs to perform a coherency check on an object that has
|
||||||
|
just been looked up. The cookie attached to the object will determine the
|
||||||
|
netfs to use. data and datalen should specify where the auxiliary data
|
||||||
|
retrieved from the cache can be found.
|
||||||
|
|
||||||
|
One of three values will be returned:
|
||||||
|
|
||||||
|
(*) FSCACHE_CHECKAUX_OKAY
|
||||||
|
|
||||||
|
The coherency data indicates the object is valid as is.
|
||||||
|
|
||||||
|
(*) FSCACHE_CHECKAUX_NEEDS_UPDATE
|
||||||
|
|
||||||
|
The coherency data needs updating, but otherwise the object is
|
||||||
|
valid.
|
||||||
|
|
||||||
|
(*) FSCACHE_CHECKAUX_OBSOLETE
|
||||||
|
|
||||||
|
The coherency data indicates that the object is obsolete and should
|
||||||
|
be discarded.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Initialise a freshly allocated object:
|
||||||
|
|
||||||
|
void fscache_object_init(struct fscache_object *object);
|
||||||
|
|
||||||
|
This initialises all the fields in an object representation.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Indicate the destruction of an object:
|
||||||
|
|
||||||
|
void fscache_object_destroyed(struct fscache_cache *cache);
|
||||||
|
|
||||||
|
This must be called to inform FS-Cache that an object that belonged to a
|
||||||
|
cache has been destroyed and deallocated. This will allow continuation
|
||||||
|
of the cache withdrawal process when it is stopped pending destruction of
|
||||||
|
all the objects.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Indicate negative lookup on an object:
|
||||||
|
|
||||||
|
void fscache_object_lookup_negative(struct fscache_object *object);
|
||||||
|
|
||||||
|
This is called to indicate to FS-Cache that a lookup process for an object
|
||||||
|
found a negative result.
|
||||||
|
|
||||||
|
This changes the state of an object to permit reads pending on lookup
|
||||||
|
completion to go off and start fetching data from the netfs server as it's
|
||||||
|
known at this point that there can't be any data in the cache.
|
||||||
|
|
||||||
|
This may be called multiple times on an object. Only the first call is
|
||||||
|
significant - all subsequent calls are ignored.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Indicate an object has been obtained:
|
||||||
|
|
||||||
|
void fscache_obtained_object(struct fscache_object *object);
|
||||||
|
|
||||||
|
This is called to indicate to FS-Cache that a lookup process for an object
|
||||||
|
produced a positive result, or that an object was created. This should
|
||||||
|
only be called once for any particular object.
|
||||||
|
|
||||||
|
This changes the state of an object to indicate:
|
||||||
|
|
||||||
|
(1) if no call to fscache_object_lookup_negative() has been made on
|
||||||
|
this object, that there may be data available, and that reads can
|
||||||
|
now go and look for it; and
|
||||||
|
|
||||||
|
(2) that writes may now proceed against this object.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Indicate that object lookup failed:
|
||||||
|
|
||||||
|
void fscache_object_lookup_error(struct fscache_object *object);
|
||||||
|
|
||||||
|
This marks an object as having encountered a fatal error (usually EIO)
|
||||||
|
and causes it to move into a state whereby it will be withdrawn as soon
|
||||||
|
as possible.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Get and release references on a retrieval record:
|
||||||
|
|
||||||
|
void fscache_get_retrieval(struct fscache_retrieval *op);
|
||||||
|
void fscache_put_retrieval(struct fscache_retrieval *op);
|
||||||
|
|
||||||
|
These two functions are used to retain a retrieval record whilst doing
|
||||||
|
asynchronous data retrieval and block allocation.
|
||||||
|
|
||||||
|
|
||||||
|
(*) Enqueue a retrieval record for processing.
|
||||||
|
|
||||||
|
void fscache_enqueue_retrieval(struct fscache_retrieval *op);
|
||||||
|
|
||||||
|
This enqueues a retrieval record for processing by the FS-Cache thread
|
||||||
|
pool. One of the threads in the pool will invoke the retrieval record's
|
||||||
|
op->op.processor callback function. This function may be called from
|
||||||
|
within the callback function.
|
||||||
|
|
||||||
|
|
||||||
|
(*) List of object state names:
|
||||||
|
|
||||||
|
const char *fscache_object_states[];
|
||||||
|
|
||||||
|
For debugging purposes, this may be used to turn the state that an object
|
||||||
|
is in into a text string for display purposes.
|
501
Documentation/filesystems/caching/cachefiles.txt
Normal file
501
Documentation/filesystems/caching/cachefiles.txt
Normal file
|
@ -0,0 +1,501 @@
|
||||||
|
===============================================
|
||||||
|
CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
|
||||||
|
===============================================
|
||||||
|
|
||||||
|
Contents:
|
||||||
|
|
||||||
|
(*) Overview.
|
||||||
|
|
||||||
|
(*) Requirements.
|
||||||
|
|
||||||
|
(*) Configuration.
|
||||||
|
|
||||||
|
(*) Starting the cache.
|
||||||
|
|
||||||
|
(*) Things to avoid.
|
||||||
|
|
||||||
|
(*) Cache culling.
|
||||||
|
|
||||||
|
(*) Cache structure.
|
||||||
|
|
||||||
|
(*) Security model and SELinux.
|
||||||
|
|
||||||
|
(*) A note on security.
|
||||||
|
|
||||||
|
(*) Statistical information.
|
||||||
|
|
||||||
|
(*) Debugging.
|
||||||
|
|
||||||
|
|
||||||
|
========
|
||||||
|
OVERVIEW
|
||||||
|
========
|
||||||
|
|
||||||
|
CacheFiles is a caching backend that's meant to use as a cache a directory on
|
||||||
|
an already mounted filesystem of a local type (such as Ext3).
|
||||||
|
|
||||||
|
CacheFiles uses a userspace daemon to do some of the cache management - such as
|
||||||
|
reaping stale nodes and culling. This is called cachefilesd and lives in
|
||||||
|
/sbin.
|
||||||
|
|
||||||
|
The filesystem and data integrity of the cache are only as good as those of the
|
||||||
|
filesystem providing the backing services. Note that CacheFiles does not
|
||||||
|
attempt to journal anything since the journalling interfaces of the various
|
||||||
|
filesystems are very specific in nature.
|
||||||
|
|
||||||
|
CacheFiles creates a misc character device - "/dev/cachefiles" - that is used
|
||||||
|
to communication with the daemon. Only one thing may have this open at once,
|
||||||
|
and whilst it is open, a cache is at least partially in existence. The daemon
|
||||||
|
opens this and sends commands down it to control the cache.
|
||||||
|
|
||||||
|
CacheFiles is currently limited to a single cache.
|
||||||
|
|
||||||
|
CacheFiles attempts to maintain at least a certain percentage of free space on
|
||||||
|
the filesystem, shrinking the cache by culling the objects it contains to make
|
||||||
|
space if necessary - see the "Cache Culling" section. This means it can be
|
||||||
|
placed on the same medium as a live set of data, and will expand to make use of
|
||||||
|
spare space and automatically contract when the set of data requires more
|
||||||
|
space.
|
||||||
|
|
||||||
|
|
||||||
|
============
|
||||||
|
REQUIREMENTS
|
||||||
|
============
|
||||||
|
|
||||||
|
The use of CacheFiles and its daemon requires the following features to be
|
||||||
|
available in the system and in the cache filesystem:
|
||||||
|
|
||||||
|
- dnotify.
|
||||||
|
|
||||||
|
- extended attributes (xattrs).
|
||||||
|
|
||||||
|
- openat() and friends.
|
||||||
|
|
||||||
|
- bmap() support on files in the filesystem (FIBMAP ioctl).
|
||||||
|
|
||||||
|
- The use of bmap() to detect a partial page at the end of the file.
|
||||||
|
|
||||||
|
It is strongly recommended that the "dir_index" option is enabled on Ext3
|
||||||
|
filesystems being used as a cache.
|
||||||
|
|
||||||
|
|
||||||
|
=============
|
||||||
|
CONFIGURATION
|
||||||
|
=============
|
||||||
|
|
||||||
|
The cache is configured by a script in /etc/cachefilesd.conf. These commands
|
||||||
|
set up cache ready for use. The following script commands are available:
|
||||||
|
|
||||||
|
(*) brun <N>%
|
||||||
|
(*) bcull <N>%
|
||||||
|
(*) bstop <N>%
|
||||||
|
(*) frun <N>%
|
||||||
|
(*) fcull <N>%
|
||||||
|
(*) fstop <N>%
|
||||||
|
|
||||||
|
Configure the culling limits. Optional. See the section on culling
|
||||||
|
The defaults are 7% (run), 5% (cull) and 1% (stop) respectively.
|
||||||
|
|
||||||
|
The commands beginning with a 'b' are file space (block) limits, those
|
||||||
|
beginning with an 'f' are file count limits.
|
||||||
|
|
||||||
|
(*) dir <path>
|
||||||
|
|
||||||
|
Specify the directory containing the root of the cache. Mandatory.
|
||||||
|
|
||||||
|
(*) tag <name>
|
||||||
|
|
||||||
|
Specify a tag to FS-Cache to use in distinguishing multiple caches.
|
||||||
|
Optional. The default is "CacheFiles".
|
||||||
|
|
||||||
|
(*) debug <mask>
|
||||||
|
|
||||||
|
Specify a numeric bitmask to control debugging in the kernel module.
|
||||||
|
Optional. The default is zero (all off). The following values can be
|
||||||
|
OR'd into the mask to collect various information:
|
||||||
|
|
||||||
|
1 Turn on trace of function entry (_enter() macros)
|
||||||
|
2 Turn on trace of function exit (_leave() macros)
|
||||||
|
4 Turn on trace of internal debug points (_debug())
|
||||||
|
|
||||||
|
This mask can also be set through sysfs, eg:
|
||||||
|
|
||||||
|
echo 5 >/sys/modules/cachefiles/parameters/debug
|
||||||
|
|
||||||
|
|
||||||
|
==================
|
||||||
|
STARTING THE CACHE
|
||||||
|
==================
|
||||||
|
|
||||||
|
The cache is started by running the daemon. The daemon opens the cache device,
|
||||||
|
configures the cache and tells it to begin caching. At that point the cache
|
||||||
|
binds to fscache and the cache becomes live.
|
||||||
|
|
||||||
|
The daemon is run as follows:
|
||||||
|
|
||||||
|
/sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]
|
||||||
|
|
||||||
|
The flags are:
|
||||||
|
|
||||||
|
(*) -d
|
||||||
|
|
||||||
|
Increase the debugging level. This can be specified multiple times and
|
||||||
|
is cumulative with itself.
|
||||||
|
|
||||||
|
(*) -s
|
||||||
|
|
||||||
|
Send messages to stderr instead of syslog.
|
||||||
|
|
||||||
|
(*) -n
|
||||||
|
|
||||||
|
Don't daemonise and go into background.
|
||||||
|
|
||||||
|
(*) -f <configfile>
|
||||||
|
|
||||||
|
Use an alternative configuration file rather than the default one.
|
||||||
|
|
||||||
|
|
||||||
|
===============
|
||||||
|
THINGS TO AVOID
|
||||||
|
===============
|
||||||
|
|
||||||
|
Do not mount other things within the cache as this will cause problems. The
|
||||||
|
kernel module contains its own very cut-down path walking facility that ignores
|
||||||
|
mountpoints, but the daemon can't avoid them.
|
||||||
|
|
||||||
|
Do not create, rename or unlink files and directories in the cache whilst the
|
||||||
|
cache is active, as this may cause the state to become uncertain.
|
||||||
|
|
||||||
|
Renaming files in the cache might make objects appear to be other objects (the
|
||||||
|
filename is part of the lookup key).
|
||||||
|
|
||||||
|
Do not change or remove the extended attributes attached to cache files by the
|
||||||
|
cache as this will cause the cache state management to get confused.
|
||||||
|
|
||||||
|
Do not create files or directories in the cache, lest the cache get confused or
|
||||||
|
serve incorrect data.
|
||||||
|
|
||||||
|
Do not chmod files in the cache. The module creates things with minimal
|
||||||
|
permissions to prevent random users being able to access them directly.
|
||||||
|
|
||||||
|
|
||||||
|
=============
|
||||||
|
CACHE CULLING
|
||||||
|
=============
|
||||||
|
|
||||||
|
The cache may need culling occasionally to make space. This involves
|
||||||
|
discarding objects from the cache that have been used less recently than
|
||||||
|
anything else. Culling is based on the access time of data objects. Empty
|
||||||
|
directories are culled if not in use.
|
||||||
|
|
||||||
|
Cache culling is done on the basis of the percentage of blocks and the
|
||||||
|
percentage of files available in the underlying filesystem. There are six
|
||||||
|
"limits":
|
||||||
|
|
||||||
|
(*) brun
|
||||||
|
(*) frun
|
||||||
|
|
||||||
|
If the amount of free space and the number of available files in the cache
|
||||||
|
rises above both these limits, then culling is turned off.
|
||||||
|
|
||||||
|
(*) bcull
|
||||||
|
(*) fcull
|
||||||
|
|
||||||
|
If the amount of available space or the number of available files in the
|
||||||
|
cache falls below either of these limits, then culling is started.
|
||||||
|
|
||||||
|
(*) bstop
|
||||||
|
(*) fstop
|
||||||
|
|
||||||
|
If the amount of available space or the number of available files in the
|
||||||
|
cache falls below either of these limits, then no further allocation of
|
||||||
|
disk space or files is permitted until culling has raised things above
|
||||||
|
these limits again.
|
||||||
|
|
||||||
|
These must be configured thusly:
|
||||||
|
|
||||||
|
0 <= bstop < bcull < brun < 100
|
||||||
|
0 <= fstop < fcull < frun < 100
|
||||||
|
|
||||||
|
Note that these are percentages of available space and available files, and do
|
||||||
|
_not_ appear as 100 minus the percentage displayed by the "df" program.
|
||||||
|
|
||||||
|
The userspace daemon scans the cache to build up a table of cullable objects.
|
||||||
|
These are then culled in least recently used order. A new scan of the cache is
|
||||||
|
started as soon as space is made in the table. Objects will be skipped if
|
||||||
|
their atimes have changed or if the kernel module says it is still using them.
|
||||||
|
|
||||||
|
|
||||||
|
===============
|
||||||
|
CACHE STRUCTURE
|
||||||
|
===============
|
||||||
|
|
||||||
|
The CacheFiles module will create two directories in the directory it was
|
||||||
|
given:
|
||||||
|
|
||||||
|
(*) cache/
|
||||||
|
|
||||||
|
(*) graveyard/
|
||||||
|
|
||||||
|
The active cache objects all reside in the first directory. The CacheFiles
|
||||||
|
kernel module moves any retired or culled objects that it can't simply unlink
|
||||||
|
to the graveyard from which the daemon will actually delete them.
|
||||||
|
|
||||||
|
The daemon uses dnotify to monitor the graveyard directory, and will delete
|
||||||
|
anything that appears therein.
|
||||||
|
|
||||||
|
|
||||||
|
The module represents index objects as directories with the filename "I..." or
|
||||||
|
"J...". Note that the "cache/" directory is itself a special index.
|
||||||
|
|
||||||
|
Data objects are represented as files if they have no children, or directories
|
||||||
|
if they do. Their filenames all begin "D..." or "E...". If represented as a
|
||||||
|
directory, data objects will have a file in the directory called "data" that
|
||||||
|
actually holds the data.
|
||||||
|
|
||||||
|
Special objects are similar to data objects, except their filenames begin
|
||||||
|
"S..." or "T...".
|
||||||
|
|
||||||
|
|
||||||
|
If an object has children, then it will be represented as a directory.
|
||||||
|
Immediately in the representative directory are a collection of directories
|
||||||
|
named for hash values of the child object keys with an '@' prepended. Into
|
||||||
|
this directory, if possible, will be placed the representations of the child
|
||||||
|
objects:
|
||||||
|
|
||||||
|
INDEX INDEX INDEX DATA FILES
|
||||||
|
========= ========== ================================= ================
|
||||||
|
cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
|
||||||
|
cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
|
||||||
|
cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
|
||||||
|
cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry
|
||||||
|
|
||||||
|
|
||||||
|
If the key is so long that it exceeds NAME_MAX with the decorations added on to
|
||||||
|
it, then it will be cut into pieces, the first few of which will be used to
|
||||||
|
make a nest of directories, and the last one of which will be the objects
|
||||||
|
inside the last directory. The names of the intermediate directories will have
|
||||||
|
'+' prepended:
|
||||||
|
|
||||||
|
J1223/@23/+xy...z/+kl...m/Epqr
|
||||||
|
|
||||||
|
|
||||||
|
Note that keys are raw data, and not only may they exceed NAME_MAX in size,
|
||||||
|
they may also contain things like '/' and NUL characters, and so they may not
|
||||||
|
be suitable for turning directly into a filename.
|
||||||
|
|
||||||
|
To handle this, CacheFiles will use a suitably printable filename directly and
|
||||||
|
"base-64" encode ones that aren't directly suitable. The two versions of
|
||||||
|
object filenames indicate the encoding:
|
||||||
|
|
||||||
|
OBJECT TYPE PRINTABLE ENCODED
|
||||||
|
=============== =============== ===============
|
||||||
|
Index "I..." "J..."
|
||||||
|
Data "D..." "E..."
|
||||||
|
Special "S..." "T..."
|
||||||
|
|
||||||
|
Intermediate directories are always "@" or "+" as appropriate.
|
||||||
|
|
||||||
|
|
||||||
|
Each object in the cache has an extended attribute label that holds the object
|
||||||
|
type ID (required to distinguish special objects) and the auxiliary data from
|
||||||
|
the netfs. The latter is used to detect stale objects in the cache and update
|
||||||
|
or retire them.
|
||||||
|
|
||||||
|
|
||||||
|
Note that CacheFiles will erase from the cache any file it doesn't recognise or
|
||||||
|
any file of an incorrect type (such as a FIFO file or a device file).
|
||||||
|
|
||||||
|
|
||||||
|
==========================
|
||||||
|
SECURITY MODEL AND SELINUX
|
||||||
|
==========================
|
||||||
|
|
||||||
|
CacheFiles is implemented to deal properly with the LSM security features of
|
||||||
|
the Linux kernel and the SELinux facility.
|
||||||
|
|
||||||
|
One of the problems that CacheFiles faces is that it is generally acting on
|
||||||
|
behalf of a process, and running in that process's context, and that includes a
|
||||||
|
security context that is not appropriate for accessing the cache - either
|
||||||
|
because the files in the cache are inaccessible to that process, or because if
|
||||||
|
the process creates a file in the cache, that file may be inaccessible to other
|
||||||
|
processes.
|
||||||
|
|
||||||
|
The way CacheFiles works is to temporarily change the security context (fsuid,
|
||||||
|
fsgid and actor security label) that the process acts as - without changing the
|
||||||
|
security context of the process when it the target of an operation performed by
|
||||||
|
some other process (so signalling and suchlike still work correctly).
|
||||||
|
|
||||||
|
|
||||||
|
When the CacheFiles module is asked to bind to its cache, it:
|
||||||
|
|
||||||
|
(1) Finds the security label attached to the root cache directory and uses
|
||||||
|
that as the security label with which it will create files. By default,
|
||||||
|
this is:
|
||||||
|
|
||||||
|
cachefiles_var_t
|
||||||
|
|
||||||
|
(2) Finds the security label of the process which issued the bind request
|
||||||
|
(presumed to be the cachefilesd daemon), which by default will be:
|
||||||
|
|
||||||
|
cachefilesd_t
|
||||||
|
|
||||||
|
and asks LSM to supply a security ID as which it should act given the
|
||||||
|
daemon's label. By default, this will be:
|
||||||
|
|
||||||
|
cachefiles_kernel_t
|
||||||
|
|
||||||
|
SELinux transitions the daemon's security ID to the module's security ID
|
||||||
|
based on a rule of this form in the policy.
|
||||||
|
|
||||||
|
type_transition <daemon's-ID> kernel_t : process <module's-ID>;
|
||||||
|
|
||||||
|
For instance:
|
||||||
|
|
||||||
|
type_transition cachefilesd_t kernel_t : process cachefiles_kernel_t;
|
||||||
|
|
||||||
|
|
||||||
|
The module's security ID gives it permission to create, move and remove files
|
||||||
|
and directories in the cache, to find and access directories and files in the
|
||||||
|
cache, to set and access extended attributes on cache objects, and to read and
|
||||||
|
write files in the cache.
|
||||||
|
|
||||||
|
The daemon's security ID gives it only a very restricted set of permissions: it
|
||||||
|
may scan directories, stat files and erase files and directories. It may
|
||||||
|
not read or write files in the cache, and so it is precluded from accessing the
|
||||||
|
data cached therein; nor is it permitted to create new files in the cache.
|
||||||
|
|
||||||
|
|
||||||
|
There are policy source files available in:
|
||||||
|
|
||||||
|
http://people.redhat.com/~dhowells/fscache/cachefilesd-0.8.tar.bz2
|
||||||
|
|
||||||
|
and later versions. In that tarball, see the files:
|
||||||
|
|
||||||
|
cachefilesd.te
|
||||||
|
cachefilesd.fc
|
||||||
|
cachefilesd.if
|
||||||
|
|
||||||
|
They are built and installed directly by the RPM.
|
||||||
|
|
||||||
|
If a non-RPM based system is being used, then copy the above files to their own
|
||||||
|
directory and run:
|
||||||
|
|
||||||
|
make -f /usr/share/selinux/devel/Makefile
|
||||||
|
semodule -i cachefilesd.pp
|
||||||
|
|
||||||
|
You will need checkpolicy and selinux-policy-devel installed prior to the
|
||||||
|
build.
|
||||||
|
|
||||||
|
|
||||||
|
By default, the cache is located in /var/fscache, but if it is desirable that
|
||||||
|
it should be elsewhere, than either the above policy files must be altered, or
|
||||||
|
an auxiliary policy must be installed to label the alternate location of the
|
||||||
|
cache.
|
||||||
|
|
||||||
|
For instructions on how to add an auxiliary policy to enable the cache to be
|
||||||
|
located elsewhere when SELinux is in enforcing mode, please see:
|
||||||
|
|
||||||
|
/usr/share/doc/cachefilesd-*/move-cache.txt
|
||||||
|
|
||||||
|
When the cachefilesd rpm is installed; alternatively, the document can be found
|
||||||
|
in the sources.
|
||||||
|
|
||||||
|
|
||||||
|
==================
|
||||||
|
A NOTE ON SECURITY
|
||||||
|
==================
|
||||||
|
|
||||||
|
CacheFiles makes use of the split security in the task_struct. It allocates
|
||||||
|
its own task_security structure, and redirects current->act_as to point to it
|
||||||
|
when it acts on behalf of another process, in that process's context.
|
||||||
|
|
||||||
|
The reason it does this is that it calls vfs_mkdir() and suchlike rather than
|
||||||
|
bypassing security and calling inode ops directly. Therefore the VFS and LSM
|
||||||
|
may deny the CacheFiles access to the cache data because under some
|
||||||
|
circumstances the caching code is running in the security context of whatever
|
||||||
|
process issued the original syscall on the netfs.
|
||||||
|
|
||||||
|
Furthermore, should CacheFiles create a file or directory, the security
|
||||||
|
parameters with that object is created (UID, GID, security label) would be
|
||||||
|
derived from that process that issued the system call, thus potentially
|
||||||
|
preventing other processes from accessing the cache - including CacheFiles's
|
||||||
|
cache management daemon (cachefilesd).
|
||||||
|
|
||||||
|
What is required is to temporarily override the security of the process that
|
||||||
|
issued the system call. We can't, however, just do an in-place change of the
|
||||||
|
security data as that affects the process as an object, not just as a subject.
|
||||||
|
This means it may lose signals or ptrace events for example, and affects what
|
||||||
|
the process looks like in /proc.
|
||||||
|
|
||||||
|
So CacheFiles makes use of a logical split in the security between the
|
||||||
|
objective security (task->sec) and the subjective security (task->act_as). The
|
||||||
|
objective security holds the intrinsic security properties of a process and is
|
||||||
|
never overridden. This is what appears in /proc, and is what is used when a
|
||||||
|
process is the target of an operation by some other process (SIGKILL for
|
||||||
|
example).
|
||||||
|
|
||||||
|
The subjective security holds the active security properties of a process, and
|
||||||
|
may be overridden. This is not seen externally, and is used whan a process
|
||||||
|
acts upon another object, for example SIGKILLing another process or opening a
|
||||||
|
file.
|
||||||
|
|
||||||
|
LSM hooks exist that allow SELinux (or Smack or whatever) to reject a request
|
||||||
|
for CacheFiles to run in a context of a specific security label, or to create
|
||||||
|
files and directories with another security label.
|
||||||
|
|
||||||
|
|
||||||
|
=======================
|
||||||
|
STATISTICAL INFORMATION
|
||||||
|
=======================
|
||||||
|
|
||||||
|
If FS-Cache is compiled with the following option enabled:
|
||||||
|
|
||||||
|
CONFIG_CACHEFILES_HISTOGRAM=y
|
||||||
|
|
||||||
|
then it will gather certain statistics and display them through a proc file.
|
||||||
|
|
||||||
|
(*) /proc/fs/cachefiles/histogram
|
||||||
|
|
||||||
|
cat /proc/fs/cachefiles/histogram
|
||||||
|
JIFS SECS LOOKUPS MKDIRS CREATES
|
||||||
|
===== ===== ========= ========= =========
|
||||||
|
|
||||||
|
This shows the breakdown of the number of times each amount of time
|
||||||
|
between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The
|
||||||
|
columns are as follows:
|
||||||
|
|
||||||
|
COLUMN TIME MEASUREMENT
|
||||||
|
======= =======================================================
|
||||||
|
LOOKUPS Length of time to perform a lookup on the backing fs
|
||||||
|
MKDIRS Length of time to perform a mkdir on the backing fs
|
||||||
|
CREATES Length of time to perform a create on the backing fs
|
||||||
|
|
||||||
|
Each row shows the number of events that took a particular range of times.
|
||||||
|
Each step is 1 jiffy in size. The JIFS column indicates the particular
|
||||||
|
jiffy range covered, and the SECS field the equivalent number of seconds.
|
||||||
|
|
||||||
|
|
||||||
|
=========
|
||||||
|
DEBUGGING
|
||||||
|
=========
|
||||||
|
|
||||||
|
If CONFIG_CACHEFILES_DEBUG is enabled, the CacheFiles facility can have runtime
|
||||||
|
debugging enabled by adjusting the value in:
|
||||||
|
|
||||||
|
/sys/module/cachefiles/parameters/debug
|
||||||
|
|
||||||
|
This is a bitmask of debugging streams to enable:
|
||||||
|
|
||||||
|
BIT VALUE STREAM POINT
|
||||||
|
======= ======= =============================== =======================
|
||||||
|
0 1 General Function entry trace
|
||||||
|
1 2 Function exit trace
|
||||||
|
2 4 General
|
||||||
|
|
||||||
|
The appropriate set of values should be OR'd together and the result written to
|
||||||
|
the control file. For example:
|
||||||
|
|
||||||
|
echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug
|
||||||
|
|
||||||
|
will turn on all function entry debugging.
|
333
Documentation/filesystems/caching/fscache.txt
Normal file
333
Documentation/filesystems/caching/fscache.txt
Normal file
|
@ -0,0 +1,333 @@
|
||||||
|
==========================
|
||||||
|
General Filesystem Caching
|
||||||
|
==========================
|
||||||
|
|
||||||
|
========
|
||||||
|
OVERVIEW
|
||||||
|
========
|
||||||
|
|
||||||
|
This facility is a general purpose cache for network filesystems, though it
|
||||||
|
could be used for caching other things such as ISO9660 filesystems too.
|
||||||
|
|
||||||
|
FS-Cache mediates between cache backends (such as CacheFS) and network
|
||||||
|
filesystems:
|
||||||
|
|
||||||
|
+---------+
|
||||||
|
| | +--------------+
|
||||||
|
| NFS |--+ | |
|
||||||
|
| | | +-->| CacheFS |
|
||||||
|
+---------+ | +----------+ | | /dev/hda5 |
|
||||||
|
| | | | +--------------+
|
||||||
|
+---------+ +-->| | |
|
||||||
|
| | | |--+
|
||||||
|
| AFS |----->| FS-Cache |
|
||||||
|
| | | |--+
|
||||||
|
+---------+ +-->| | |
|
||||||
|
| | | | +--------------+
|
||||||
|
+---------+ | +----------+ | | |
|
||||||
|
| | | +-->| CacheFiles |
|
||||||
|
| ISOFS |--+ | /var/cache |
|
||||||
|
| | +--------------+
|
||||||
|
+---------+
|
||||||
|
|
||||||
|
Or to look at it another way, FS-Cache is a module that provides a caching
|
||||||
|
facility to a network filesystem such that the cache is transparent to the
|
||||||
|
user:
|
||||||
|
|
||||||
|
+---------+
|
||||||
|
| |
|
||||||
|
| Server |
|
||||||
|
| |
|
||||||
|
+---------+
|
||||||
|
| NETWORK
|
||||||
|
~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
|
||||||
|
| +----------+
|
||||||
|
V | |
|
||||||
|
+---------+ | |
|
||||||
|
| | | |
|
||||||
|
| NFS |----->| FS-Cache |
|
||||||
|
| | | |--+
|
||||||
|
+---------+ | | | +--------------+ +--------------+
|
||||||
|
| | | | | | | |
|
||||||
|
V +----------+ +-->| CacheFiles |-->| Ext3 |
|
||||||
|
+---------+ | /var/cache | | /dev/sda6 |
|
||||||
|
| | +--------------+ +--------------+
|
||||||
|
| VFS | ^ ^
|
||||||
|
| | | |
|
||||||
|
+---------+ +--------------+ |
|
||||||
|
| KERNEL SPACE | |
|
||||||
|
~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|~~~~~~|~~~~
|
||||||
|
| USER SPACE | |
|
||||||
|
V | |
|
||||||
|
+---------+ +--------------+
|
||||||
|
| | | |
|
||||||
|
| Process | | cachefilesd |
|
||||||
|
| | | |
|
||||||
|
+---------+ +--------------+
|
||||||
|
|
||||||
|
|
||||||
|
FS-Cache does not follow the idea of completely loading every netfs file
|
||||||
|
opened in its entirety into a cache before permitting it to be accessed and
|
||||||
|
then serving the pages out of that cache rather than the netfs inode because:
|
||||||
|
|
||||||
|
(1) It must be practical to operate without a cache.
|
||||||
|
|
||||||
|
(2) The size of any accessible file must not be limited to the size of the
|
||||||
|
cache.
|
||||||
|
|
||||||
|
(3) The combined size of all opened files (this includes mapped libraries)
|
||||||
|
must not be limited to the size of the cache.
|
||||||
|
|
||||||
|
(4) The user should not be forced to download an entire file just to do a
|
||||||
|
one-off access of a small portion of it (such as might be done with the
|
||||||
|
"file" program).
|
||||||
|
|
||||||
|
It instead serves the cache out in PAGE_SIZE chunks as and when requested by
|
||||||
|
the netfs('s) using it.
|
||||||
|
|
||||||
|
|
||||||
|
FS-Cache provides the following facilities:
|
||||||
|
|
||||||
|
(1) More than one cache can be used at once. Caches can be selected
|
||||||
|
explicitly by use of tags.
|
||||||
|
|
||||||
|
(2) Caches can be added / removed at any time.
|
||||||
|
|
||||||
|
(3) The netfs is provided with an interface that allows either party to
|
||||||
|
withdraw caching facilities from a file (required for (2)).
|
||||||
|
|
||||||
|
(4) The interface to the netfs returns as few errors as possible, preferring
|
||||||
|
rather to let the netfs remain oblivious.
|
||||||
|
|
||||||
|
(5) Cookies are used to represent indices, files and other objects to the
|
||||||
|
netfs. The simplest cookie is just a NULL pointer - indicating nothing
|
||||||
|
cached there.
|
||||||
|
|
||||||
|
(6) The netfs is allowed to propose - dynamically - any index hierarchy it
|
||||||
|
desires, though it must be aware that the index search function is
|
||||||
|
recursive, stack space is limited, and indices can only be children of
|
||||||
|
indices.
|
||||||
|
|
||||||
|
(7) Data I/O is done direct to and from the netfs's pages. The netfs
|
||||||
|
indicates that page A is at index B of the data-file represented by cookie
|
||||||
|
C, and that it should be read or written. The cache backend may or may
|
||||||
|
not start I/O on that page, but if it does, a netfs callback will be
|
||||||
|
invoked to indicate completion. The I/O may be either synchronous or
|
||||||
|
asynchronous.
|
||||||
|
|
||||||
|
(8) Cookies can be "retired" upon release. At this point FS-Cache will mark
|
||||||
|
them as obsolete and the index hierarchy rooted at that point will get
|
||||||
|
recycled.
|
||||||
|
|
||||||
|
(9) The netfs provides a "match" function for index searches. In addition to
|
||||||
|
saying whether a match was made or not, this can also specify that an
|
||||||
|
entry should be updated or deleted.
|
||||||
|
|
||||||
|
(10) As much as possible is done asynchronously.
|
||||||
|
|
||||||
|
|
||||||
|
FS-Cache maintains a virtual indexing tree in which all indices, files, objects
|
||||||
|
and pages are kept. Bits of this tree may actually reside in one or more
|
||||||
|
caches.
|
||||||
|
|
||||||
|
FSDEF
|
||||||
|
|
|
||||||
|
+------------------------------------+
|
||||||
|
| |
|
||||||
|
NFS AFS
|
||||||
|
| |
|
||||||
|
+--------------------------+ +-----------+
|
||||||
|
| | | |
|
||||||
|
homedir mirror afs.org redhat.com
|
||||||
|
| | |
|
||||||
|
+------------+ +---------------+ +----------+
|
||||||
|
| | | | | |
|
||||||
|
00001 00002 00007 00125 vol00001 vol00002
|
||||||
|
| | | | |
|
||||||
|
+---+---+ +-----+ +---+ +------+------+ +-----+----+
|
||||||
|
| | | | | | | | | | | | |
|
||||||
|
PG0 PG1 PG2 PG0 XATTR PG0 PG1 DIRENT DIRENT DIRENT R/W R/O Bak
|
||||||
|
| |
|
||||||
|
PG0 +-------+
|
||||||
|
| |
|
||||||
|
00001 00003
|
||||||
|
|
|
||||||
|
+---+---+
|
||||||
|
| | |
|
||||||
|
PG0 PG1 PG2
|
||||||
|
|
||||||
|
In the example above, you can see two netfs's being backed: NFS and AFS. These
|
||||||
|
have different index hierarchies:
|
||||||
|
|
||||||
|
(*) The NFS primary index contains per-server indices. Each server index is
|
||||||
|
indexed by NFS file handles to get data file objects. Each data file
|
||||||
|
objects can have an array of pages, but may also have further child
|
||||||
|
objects, such as extended attributes and directory entries. Extended
|
||||||
|
attribute objects themselves have page-array contents.
|
||||||
|
|
||||||
|
(*) The AFS primary index contains per-cell indices. Each cell index contains
|
||||||
|
per-logical-volume indices. Each of volume index contains up to three
|
||||||
|
indices for the read-write, read-only and backup mirrors of those volumes.
|
||||||
|
Each of these contains vnode data file objects, each of which contains an
|
||||||
|
array of pages.
|
||||||
|
|
||||||
|
The very top index is the FS-Cache master index in which individual netfs's
|
||||||
|
have entries.
|
||||||
|
|
||||||
|
Any index object may reside in more than one cache, provided it only has index
|
||||||
|
children. Any index with non-index object children will be assumed to only
|
||||||
|
reside in one cache.
|
||||||
|
|
||||||
|
|
||||||
|
The netfs API to FS-Cache can be found in:
|
||||||
|
|
||||||
|
Documentation/filesystems/caching/netfs-api.txt
|
||||||
|
|
||||||
|
The cache backend API to FS-Cache can be found in:
|
||||||
|
|
||||||
|
Documentation/filesystems/caching/backend-api.txt
|
||||||
|
|
||||||
|
A description of the internal representations and object state machine can be
|
||||||
|
found in:
|
||||||
|
|
||||||
|
Documentation/filesystems/caching/object.txt
|
||||||
|
|
||||||
|
|
||||||
|
=======================
|
||||||
|
STATISTICAL INFORMATION
|
||||||
|
=======================
|
||||||
|
|
||||||
|
If FS-Cache is compiled with the following options enabled:
|
||||||
|
|
||||||
|
CONFIG_FSCACHE_STATS=y
|
||||||
|
CONFIG_FSCACHE_HISTOGRAM=y
|
||||||
|
|
||||||
|
then it will gather certain statistics and display them through a number of
|
||||||
|
proc files.
|
||||||
|
|
||||||
|
(*) /proc/fs/fscache/stats
|
||||||
|
|
||||||
|
This shows counts of a number of events that can happen in FS-Cache:
|
||||||
|
|
||||||
|
CLASS EVENT MEANING
|
||||||
|
======= ======= =======================================================
|
||||||
|
Cookies idx=N Number of index cookies allocated
|
||||||
|
dat=N Number of data storage cookies allocated
|
||||||
|
spc=N Number of special cookies allocated
|
||||||
|
Objects alc=N Number of objects allocated
|
||||||
|
nal=N Number of object allocation failures
|
||||||
|
avl=N Number of objects that reached the available state
|
||||||
|
ded=N Number of objects that reached the dead state
|
||||||
|
ChkAux non=N Number of objects that didn't have a coherency check
|
||||||
|
ok=N Number of objects that passed a coherency check
|
||||||
|
upd=N Number of objects that needed a coherency data update
|
||||||
|
obs=N Number of objects that were declared obsolete
|
||||||
|
Pages mrk=N Number of pages marked as being cached
|
||||||
|
unc=N Number of uncache page requests seen
|
||||||
|
Acquire n=N Number of acquire cookie requests seen
|
||||||
|
nul=N Number of acq reqs given a NULL parent
|
||||||
|
noc=N Number of acq reqs rejected due to no cache available
|
||||||
|
ok=N Number of acq reqs succeeded
|
||||||
|
nbf=N Number of acq reqs rejected due to error
|
||||||
|
oom=N Number of acq reqs failed on ENOMEM
|
||||||
|
Lookups n=N Number of lookup calls made on cache backends
|
||||||
|
neg=N Number of negative lookups made
|
||||||
|
pos=N Number of positive lookups made
|
||||||
|
crt=N Number of objects created by lookup
|
||||||
|
Updates n=N Number of update cookie requests seen
|
||||||
|
nul=N Number of upd reqs given a NULL parent
|
||||||
|
run=N Number of upd reqs granted CPU time
|
||||||
|
Relinqs n=N Number of relinquish cookie requests seen
|
||||||
|
nul=N Number of rlq reqs given a NULL parent
|
||||||
|
wcr=N Number of rlq reqs waited on completion of creation
|
||||||
|
AttrChg n=N Number of attribute changed requests seen
|
||||||
|
ok=N Number of attr changed requests queued
|
||||||
|
nbf=N Number of attr changed rejected -ENOBUFS
|
||||||
|
oom=N Number of attr changed failed -ENOMEM
|
||||||
|
run=N Number of attr changed ops given CPU time
|
||||||
|
Allocs n=N Number of allocation requests seen
|
||||||
|
ok=N Number of successful alloc reqs
|
||||||
|
wt=N Number of alloc reqs that waited on lookup completion
|
||||||
|
nbf=N Number of alloc reqs rejected -ENOBUFS
|
||||||
|
ops=N Number of alloc reqs submitted
|
||||||
|
owt=N Number of alloc reqs waited for CPU time
|
||||||
|
Retrvls n=N Number of retrieval (read) requests seen
|
||||||
|
ok=N Number of successful retr reqs
|
||||||
|
wt=N Number of retr reqs that waited on lookup completion
|
||||||
|
nod=N Number of retr reqs returned -ENODATA
|
||||||
|
nbf=N Number of retr reqs rejected -ENOBUFS
|
||||||
|
int=N Number of retr reqs aborted -ERESTARTSYS
|
||||||
|
oom=N Number of retr reqs failed -ENOMEM
|
||||||
|
ops=N Number of retr reqs submitted
|
||||||
|
owt=N Number of retr reqs waited for CPU time
|
||||||
|
Stores n=N Number of storage (write) requests seen
|
||||||
|
ok=N Number of successful store reqs
|
||||||
|
agn=N Number of store reqs on a page already pending storage
|
||||||
|
nbf=N Number of store reqs rejected -ENOBUFS
|
||||||
|
oom=N Number of store reqs failed -ENOMEM
|
||||||
|
ops=N Number of store reqs submitted
|
||||||
|
run=N Number of store reqs granted CPU time
|
||||||
|
Ops pend=N Number of times async ops added to pending queues
|
||||||
|
run=N Number of times async ops given CPU time
|
||||||
|
enq=N Number of times async ops queued for processing
|
||||||
|
dfr=N Number of async ops queued for deferred release
|
||||||
|
rel=N Number of async ops released
|
||||||
|
gc=N Number of deferred-release async ops garbage collected
|
||||||
|
|
||||||
|
|
||||||
|
(*) /proc/fs/fscache/histogram
|
||||||
|
|
||||||
|
cat /proc/fs/fscache/histogram
|
||||||
|
JIFS SECS OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS
|
||||||
|
===== ===== ========= ========= ========= ========= =========
|
||||||
|
|
||||||
|
This shows the breakdown of the number of times each amount of time
|
||||||
|
between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The
|
||||||
|
columns are as follows:
|
||||||
|
|
||||||
|
COLUMN TIME MEASUREMENT
|
||||||
|
======= =======================================================
|
||||||
|
OBJ INST Length of time to instantiate an object
|
||||||
|
OP RUNS Length of time a call to process an operation took
|
||||||
|
OBJ RUNS Length of time a call to process an object event took
|
||||||
|
RETRV DLY Time between an requesting a read and lookup completing
|
||||||
|
RETRIEVLS Time between beginning and end of a retrieval
|
||||||
|
|
||||||
|
Each row shows the number of events that took a particular range of times.
|
||||||
|
Each step is 1 jiffy in size. The JIFS column indicates the particular
|
||||||
|
jiffy range covered, and the SECS field the equivalent number of seconds.
|
||||||
|
|
||||||
|
|
||||||
|
=========
|
||||||
|
DEBUGGING
|
||||||
|
=========
|
||||||
|
|
||||||
|
If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime
|
||||||
|
debugging enabled by adjusting the value in:
|
||||||
|
|
||||||
|
/sys/module/fscache/parameters/debug
|
||||||
|
|
||||||
|
This is a bitmask of debugging streams to enable:
|
||||||
|
|
||||||
|
BIT VALUE STREAM POINT
|
||||||
|
======= ======= =============================== =======================
|
||||||
|
0 1 Cache management Function entry trace
|
||||||
|
1 2 Function exit trace
|
||||||
|
2 4 General
|
||||||
|
3 8 Cookie management Function entry trace
|
||||||
|
4 16 Function exit trace
|
||||||
|
5 32 General
|
||||||
|
6 64 Page handling Function entry trace
|
||||||
|
7 128 Function exit trace
|
||||||
|
8 256 General
|
||||||
|
9 512 Operation management Function entry trace
|
||||||
|
10 1024 Function exit trace
|
||||||
|
11 2048 General
|
||||||
|
|
||||||
|
The appropriate set of values should be OR'd together and the result written to
|
||||||
|
the control file. For example:
|
||||||
|
|
||||||
|
echo $((1|8|64)) >/sys/module/fscache/parameters/debug
|
||||||
|
|
||||||
|
will turn on all function entry debugging.
|
778
Documentation/filesystems/caching/netfs-api.txt
Normal file
778
Documentation/filesystems/caching/netfs-api.txt
Normal file
|
@ -0,0 +1,778 @@
|
||||||
|
===============================
|
||||||
|
FS-CACHE NETWORK FILESYSTEM API
|
||||||
|
===============================
|
||||||
|
|
||||||
|
There's an API by which a network filesystem can make use of the FS-Cache
|
||||||
|
facilities. This is based around a number of principles:
|
||||||
|
|
||||||
|
(1) Caches can store a number of different object types. There are two main
|
||||||
|
object types: indices and files. The first is a special type used by
|
||||||
|
FS-Cache to make finding objects faster and to make retiring of groups of
|
||||||
|
objects easier.
|
||||||
|
|
||||||
|
(2) Every index, file or other object is represented by a cookie. This cookie
|
||||||
|
may or may not have anything associated with it, but the netfs doesn't
|
||||||
|
need to care.
|
||||||
|
|
||||||
|
(3) Barring the top-level index (one entry per cached netfs), the index
|
||||||
|
hierarchy for each netfs is structured according the whim of the netfs.
|
||||||
|
|
||||||
|
This API is declared in <linux/fscache.h>.
|
||||||
|
|
||||||
|
This document contains the following sections:
|
||||||
|
|
||||||
|
(1) Network filesystem definition
|
||||||
|
(2) Index definition
|
||||||
|
(3) Object definition
|
||||||
|
(4) Network filesystem (un)registration
|
||||||
|
(5) Cache tag lookup
|
||||||
|
(6) Index registration
|
||||||
|
(7) Data file registration
|
||||||
|
(8) Miscellaneous object registration
|
||||||
|
(9) Setting the data file size
|
||||||
|
(10) Page alloc/read/write
|
||||||
|
(11) Page uncaching
|
||||||
|
(12) Index and data file update
|
||||||
|
(13) Miscellaneous cookie operations
|
||||||
|
(14) Cookie unregistration
|
||||||
|
(15) Index and data file invalidation
|
||||||
|
(16) FS-Cache specific page flags.
|
||||||
|
|
||||||
|
|
||||||
|
=============================
|
||||||
|
NETWORK FILESYSTEM DEFINITION
|
||||||
|
=============================
|
||||||
|
|
||||||
|
FS-Cache needs a description of the network filesystem. This is specified
|
||||||
|
using a record of the following structure:
|
||||||
|
|
||||||
|
struct fscache_netfs {
|
||||||
|
uint32_t version;
|
||||||
|
const char *name;
|
||||||
|
struct fscache_cookie *primary_index;
|
||||||
|
...
|
||||||
|
};
|
||||||
|
|
||||||
|
This first two fields should be filled in before registration, and the third
|
||||||
|
will be filled in by the registration function; any other fields should just be
|
||||||
|
ignored and are for internal use only.
|
||||||
|
|
||||||
|
The fields are:
|
||||||
|
|
||||||
|
(1) The name of the netfs (used as the key in the toplevel index).
|
||||||
|
|
||||||
|
(2) The version of the netfs (if the name matches but the version doesn't, the
|
||||||
|
entire in-cache hierarchy for this netfs will be scrapped and begun
|
||||||
|
afresh).
|
||||||
|
|
||||||
|
(3) The cookie representing the primary index will be allocated according to
|
||||||
|
another parameter passed into the registration function.
|
||||||
|
|
||||||
|
For example, kAFS (linux/fs/afs/) uses the following definitions to describe
|
||||||
|
itself:
|
||||||
|
|
||||||
|
struct fscache_netfs afs_cache_netfs = {
|
||||||
|
.version = 0,
|
||||||
|
.name = "afs",
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
================
|
||||||
|
INDEX DEFINITION
|
||||||
|
================
|
||||||
|
|
||||||
|
Indices are used for two purposes:
|
||||||
|
|
||||||
|
(1) To aid the finding of a file based on a series of keys (such as AFS's
|
||||||
|
"cell", "volume ID", "vnode ID").
|
||||||
|
|
||||||
|
(2) To make it easier to discard a subset of all the files cached based around
|
||||||
|
a particular key - for instance to mirror the removal of an AFS volume.
|
||||||
|
|
||||||
|
However, since it's unlikely that any two netfs's are going to want to define
|
||||||
|
their index hierarchies in quite the same way, FS-Cache tries to impose as few
|
||||||
|
restraints as possible on how an index is structured and where it is placed in
|
||||||
|
the tree. The netfs can even mix indices and data files at the same level, but
|
||||||
|
it's not recommended.
|
||||||
|
|
||||||
|
Each index entry consists of a key of indeterminate length plus some auxilliary
|
||||||
|
data, also of indeterminate length.
|
||||||
|
|
||||||
|
There are some limits on indices:
|
||||||
|
|
||||||
|
(1) Any index containing non-index objects should be restricted to a single
|
||||||
|
cache. Any such objects created within an index will be created in the
|
||||||
|
first cache only. The cache in which an index is created can be
|
||||||
|
controlled by cache tags (see below).
|
||||||
|
|
||||||
|
(2) The entry data must be atomically journallable, so it is limited to about
|
||||||
|
400 bytes at present. At least 400 bytes will be available.
|
||||||
|
|
||||||
|
(3) The depth of the index tree should be judged with care as the search
|
||||||
|
function is recursive. Too many layers will run the kernel out of stack.
|
||||||
|
|
||||||
|
|
||||||
|
=================
|
||||||
|
OBJECT DEFINITION
|
||||||
|
=================
|
||||||
|
|
||||||
|
To define an object, a structure of the following type should be filled out:
|
||||||
|
|
||||||
|
struct fscache_cookie_def
|
||||||
|
{
|
||||||
|
uint8_t name[16];
|
||||||
|
uint8_t type;
|
||||||
|
|
||||||
|
struct fscache_cache_tag *(*select_cache)(
|
||||||
|
const void *parent_netfs_data,
|
||||||
|
const void *cookie_netfs_data);
|
||||||
|
|
||||||
|
uint16_t (*get_key)(const void *cookie_netfs_data,
|
||||||
|
void *buffer,
|
||||||
|
uint16_t bufmax);
|
||||||
|
|
||||||
|
void (*get_attr)(const void *cookie_netfs_data,
|
||||||
|
uint64_t *size);
|
||||||
|
|
||||||
|
uint16_t (*get_aux)(const void *cookie_netfs_data,
|
||||||
|
void *buffer,
|
||||||
|
uint16_t bufmax);
|
||||||
|
|
||||||
|
enum fscache_checkaux (*check_aux)(void *cookie_netfs_data,
|
||||||
|
const void *data,
|
||||||
|
uint16_t datalen);
|
||||||
|
|
||||||
|
void (*get_context)(void *cookie_netfs_data, void *context);
|
||||||
|
|
||||||
|
void (*put_context)(void *cookie_netfs_data, void *context);
|
||||||
|
|
||||||
|
void (*mark_pages_cached)(void *cookie_netfs_data,
|
||||||
|
struct address_space *mapping,
|
||||||
|
struct pagevec *cached_pvec);
|
||||||
|
|
||||||
|
void (*now_uncached)(void *cookie_netfs_data);
|
||||||
|
};
|
||||||
|
|
||||||
|
This has the following fields:
|
||||||
|
|
||||||
|
(1) The type of the object [mandatory].
|
||||||
|
|
||||||
|
This is one of the following values:
|
||||||
|
|
||||||
|
(*) FSCACHE_COOKIE_TYPE_INDEX
|
||||||
|
|
||||||
|
This defines an index, which is a special FS-Cache type.
|
||||||
|
|
||||||
|
(*) FSCACHE_COOKIE_TYPE_DATAFILE
|
||||||
|
|
||||||
|
This defines an ordinary data file.
|
||||||
|
|
||||||
|
(*) Any other value between 2 and 255
|
||||||
|
|
||||||
|
This defines an extraordinary object such as an XATTR.
|
||||||
|
|
||||||
|
(2) The name of the object type (NUL terminated unless all 16 chars are used)
|
||||||
|
[optional].
|
||||||
|
|
||||||
|
(3) A function to select the cache in which to store an index [optional].
|
||||||
|
|
||||||
|
This function is invoked when an index needs to be instantiated in a cache
|
||||||
|
during the instantiation of a non-index object. Only the immediate index
|
||||||
|
parent for the non-index object will be queried. Any indices above that
|
||||||
|
in the hierarchy may be stored in multiple caches. This function does not
|
||||||
|
need to be supplied for any non-index object or any index that will only
|
||||||
|
have index children.
|
||||||
|
|
||||||
|
If this function is not supplied or if it returns NULL then the first
|
||||||
|
cache in the parent's list will be chosed, or failing that, the first
|
||||||
|
cache in the master list.
|
||||||
|
|
||||||
|
(4) A function to retrieve an object's key from the netfs [mandatory].
|
||||||
|
|
||||||
|
This function will be called with the netfs data that was passed to the
|
||||||
|
cookie acquisition function and the maximum length of key data that it may
|
||||||
|
provide. It should write the required key data into the given buffer and
|
||||||
|
return the quantity it wrote.
|
||||||
|
|
||||||
|
(5) A function to retrieve attribute data from the netfs [optional].
|
||||||
|
|
||||||
|
This function will be called with the netfs data that was passed to the
|
||||||
|
cookie acquisition function. It should return the size of the file if
|
||||||
|
this is a data file. The size may be used to govern how much cache must
|
||||||
|
be reserved for this file in the cache.
|
||||||
|
|
||||||
|
If the function is absent, a file size of 0 is assumed.
|
||||||
|
|
||||||
|
(6) A function to retrieve auxilliary data from the netfs [optional].
|
||||||
|
|
||||||
|
This function will be called with the netfs data that was passed to the
|
||||||
|
cookie acquisition function and the maximum length of auxilliary data that
|
||||||
|
it may provide. It should write the auxilliary data into the given buffer
|
||||||
|
and return the quantity it wrote.
|
||||||
|
|
||||||
|
If this function is absent, the auxilliary data length will be set to 0.
|
||||||
|
|
||||||
|
The length of the auxilliary data buffer may be dependent on the key
|
||||||
|
length. A netfs mustn't rely on being able to provide more than 400 bytes
|
||||||
|
for both.
|
||||||
|
|
||||||
|
(7) A function to check the auxilliary data [optional].
|
||||||
|
|
||||||
|
This function will be called to check that a match found in the cache for
|
||||||
|
this object is valid. For instance with AFS it could check the auxilliary
|
||||||
|
data against the data version number returned by the server to determine
|
||||||
|
whether the index entry in a cache is still valid.
|
||||||
|
|
||||||
|
If this function is absent, it will be assumed that matching objects in a
|
||||||
|
cache are always valid.
|
||||||
|
|
||||||
|
If present, the function should return one of the following values:
|
||||||
|
|
||||||
|
(*) FSCACHE_CHECKAUX_OKAY - the entry is okay as is
|
||||||
|
(*) FSCACHE_CHECKAUX_NEEDS_UPDATE - the entry requires update
|
||||||
|
(*) FSCACHE_CHECKAUX_OBSOLETE - the entry should be deleted
|
||||||
|
|
||||||
|
This function can also be used to extract data from the auxilliary data in
|
||||||
|
the cache and copy it into the netfs's structures.
|
||||||
|
|
||||||
|
(8) A pair of functions to manage contexts for the completion callback
|
||||||
|
[optional].
|
||||||
|
|
||||||
|
The cache read/write functions are passed a context which is then passed
|
||||||
|
to the I/O completion callback function. To ensure this context remains
|
||||||
|
valid until after the I/O completion is called, two functions may be
|
||||||
|
provided: one to get an extra reference on the context, and one to drop a
|
||||||
|
reference to it.
|
||||||
|
|
||||||
|
If the context is not used or is a type of object that won't go out of
|
||||||
|
scope, then these functions are not required. These functions are not
|
||||||
|
required for indices as indices may not contain data. These functions may
|
||||||
|
be called in interrupt context and so may not sleep.
|
||||||
|
|
||||||
|
(9) A function to mark a page as retaining cache metadata [optional].
|
||||||
|
|
||||||
|
This is called by the cache to indicate that it is retaining in-memory
|
||||||
|
information for this page and that the netfs should uncache the page when
|
||||||
|
it has finished. This does not indicate whether there's data on the disk
|
||||||
|
or not. Note that several pages at once may be presented for marking.
|
||||||
|
|
||||||
|
The PG_fscache bit is set on the pages before this function would be
|
||||||
|
called, so the function need not be provided if this is sufficient.
|
||||||
|
|
||||||
|
This function is not required for indices as they're not permitted data.
|
||||||
|
|
||||||
|
(10) A function to unmark all the pages retaining cache metadata [mandatory].
|
||||||
|
|
||||||
|
This is called by FS-Cache to indicate that a backing store is being
|
||||||
|
unbound from a cookie and that all the marks on the pages should be
|
||||||
|
cleared to prevent confusion. Note that the cache will have torn down all
|
||||||
|
its tracking information so that the pages don't need to be explicitly
|
||||||
|
uncached.
|
||||||
|
|
||||||
|
This function is not required for indices as they're not permitted data.
|
||||||
|
|
||||||
|
|
||||||
|
===================================
|
||||||
|
NETWORK FILESYSTEM (UN)REGISTRATION
|
||||||
|
===================================
|
||||||
|
|
||||||
|
The first step is to declare the network filesystem to the cache. This also
|
||||||
|
involves specifying the layout of the primary index (for AFS, this would be the
|
||||||
|
"cell" level).
|
||||||
|
|
||||||
|
The registration function is:
|
||||||
|
|
||||||
|
int fscache_register_netfs(struct fscache_netfs *netfs);
|
||||||
|
|
||||||
|
It just takes a pointer to the netfs definition. It returns 0 or an error as
|
||||||
|
appropriate.
|
||||||
|
|
||||||
|
For kAFS, registration is done as follows:
|
||||||
|
|
||||||
|
ret = fscache_register_netfs(&afs_cache_netfs);
|
||||||
|
|
||||||
|
The last step is, of course, unregistration:
|
||||||
|
|
||||||
|
void fscache_unregister_netfs(struct fscache_netfs *netfs);
|
||||||
|
|
||||||
|
|
||||||
|
================
|
||||||
|
CACHE TAG LOOKUP
|
||||||
|
================
|
||||||
|
|
||||||
|
FS-Cache permits the use of more than one cache. To permit particular index
|
||||||
|
subtrees to be bound to particular caches, the second step is to look up cache
|
||||||
|
representation tags. This step is optional; it can be left entirely up to
|
||||||
|
FS-Cache as to which cache should be used. The problem with doing that is that
|
||||||
|
FS-Cache will always pick the first cache that was registered.
|
||||||
|
|
||||||
|
To get the representation for a named tag:
|
||||||
|
|
||||||
|
struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name);
|
||||||
|
|
||||||
|
This takes a text string as the name and returns a representation of a tag. It
|
||||||
|
will never return an error. It may return a dummy tag, however, if it runs out
|
||||||
|
of memory; this will inhibit caching with this tag.
|
||||||
|
|
||||||
|
Any representation so obtained must be released by passing it to this function:
|
||||||
|
|
||||||
|
void fscache_release_cache_tag(struct fscache_cache_tag *tag);
|
||||||
|
|
||||||
|
The tag will be retrieved by FS-Cache when it calls the object definition
|
||||||
|
operation select_cache().
|
||||||
|
|
||||||
|
|
||||||
|
==================
|
||||||
|
INDEX REGISTRATION
|
||||||
|
==================
|
||||||
|
|
||||||
|
The third step is to inform FS-Cache about part of an index hierarchy that can
|
||||||
|
be used to locate files. This is done by requesting a cookie for each index in
|
||||||
|
the path to the file:
|
||||||
|
|
||||||
|
struct fscache_cookie *
|
||||||
|
fscache_acquire_cookie(struct fscache_cookie *parent,
|
||||||
|
const struct fscache_object_def *def,
|
||||||
|
void *netfs_data);
|
||||||
|
|
||||||
|
This function creates an index entry in the index represented by parent,
|
||||||
|
filling in the index entry by calling the operations pointed to by def.
|
||||||
|
|
||||||
|
Note that this function never returns an error - all errors are handled
|
||||||
|
internally. It may, however, return NULL to indicate no cookie. It is quite
|
||||||
|
acceptable to pass this token back to this function as the parent to another
|
||||||
|
acquisition (or even to the relinquish cookie, read page and write page
|
||||||
|
functions - see below).
|
||||||
|
|
||||||
|
Note also that no indices are actually created in a cache until a non-index
|
||||||
|
object needs to be created somewhere down the hierarchy. Furthermore, an index
|
||||||
|
may be created in several different caches independently at different times.
|
||||||
|
This is all handled transparently, and the netfs doesn't see any of it.
|
||||||
|
|
||||||
|
For example, with AFS, a cell would be added to the primary index. This index
|
||||||
|
entry would have a dependent inode containing a volume location index for the
|
||||||
|
volume mappings within this cell:
|
||||||
|
|
||||||
|
cell->cache =
|
||||||
|
fscache_acquire_cookie(afs_cache_netfs.primary_index,
|
||||||
|
&afs_cell_cache_index_def,
|
||||||
|
cell);
|
||||||
|
|
||||||
|
Then when a volume location was accessed, it would be entered into the cell's
|
||||||
|
index and an inode would be allocated that acts as a volume type and hash chain
|
||||||
|
combination:
|
||||||
|
|
||||||
|
vlocation->cache =
|
||||||
|
fscache_acquire_cookie(cell->cache,
|
||||||
|
&afs_vlocation_cache_index_def,
|
||||||
|
vlocation);
|
||||||
|
|
||||||
|
And then a particular flavour of volume (R/O for example) could be added to
|
||||||
|
that index, creating another index for vnodes (AFS inode equivalents):
|
||||||
|
|
||||||
|
volume->cache =
|
||||||
|
fscache_acquire_cookie(vlocation->cache,
|
||||||
|
&afs_volume_cache_index_def,
|
||||||
|
volume);
|
||||||
|
|
||||||
|
|
||||||
|
======================
|
||||||
|
DATA FILE REGISTRATION
|
||||||
|
======================
|
||||||
|
|
||||||
|
The fourth step is to request a data file be created in the cache. This is
|
||||||
|
identical to index cookie acquisition. The only difference is that the type in
|
||||||
|
the object definition should be something other than index type.
|
||||||
|
|
||||||
|
vnode->cache =
|
||||||
|
fscache_acquire_cookie(volume->cache,
|
||||||
|
&afs_vnode_cache_object_def,
|
||||||
|
vnode);
|
||||||
|
|
||||||
|
|
||||||
|
=================================
|
||||||
|
MISCELLANEOUS OBJECT REGISTRATION
|
||||||
|
=================================
|
||||||
|
|
||||||
|
An optional step is to request an object of miscellaneous type be created in
|
||||||
|
the cache. This is almost identical to index cookie acquisition. The only
|
||||||
|
difference is that the type in the object definition should be something other
|
||||||
|
than index type. Whilst the parent object could be an index, it's more likely
|
||||||
|
it would be some other type of object such as a data file.
|
||||||
|
|
||||||
|
xattr->cache =
|
||||||
|
fscache_acquire_cookie(vnode->cache,
|
||||||
|
&afs_xattr_cache_object_def,
|
||||||
|
xattr);
|
||||||
|
|
||||||
|
Miscellaneous objects might be used to store extended attributes or directory
|
||||||
|
entries for example.
|
||||||
|
|
||||||
|
|
||||||
|
==========================
|
||||||
|
SETTING THE DATA FILE SIZE
|
||||||
|
==========================
|
||||||
|
|
||||||
|
The fifth step is to set the physical attributes of the file, such as its size.
|
||||||
|
This doesn't automatically reserve any space in the cache, but permits the
|
||||||
|
cache to adjust its metadata for data tracking appropriately:
|
||||||
|
|
||||||
|
int fscache_attr_changed(struct fscache_cookie *cookie);
|
||||||
|
|
||||||
|
The cache will return -ENOBUFS if there is no backing cache or if there is no
|
||||||
|
space to allocate any extra metadata required in the cache. The attributes
|
||||||
|
will be accessed with the get_attr() cookie definition operation.
|
||||||
|
|
||||||
|
Note that attempts to read or write data pages in the cache over this size may
|
||||||
|
be rebuffed with -ENOBUFS.
|
||||||
|
|
||||||
|
This operation schedules an attribute adjustment to happen asynchronously at
|
||||||
|
some point in the future, and as such, it may happen after the function returns
|
||||||
|
to the caller. The attribute adjustment excludes read and write operations.
|
||||||
|
|
||||||
|
|
||||||
|
=====================
|
||||||
|
PAGE READ/ALLOC/WRITE
|
||||||
|
=====================
|
||||||
|
|
||||||
|
And the sixth step is to store and retrieve pages in the cache. There are
|
||||||
|
three functions that are used to do this.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
|
||||||
|
(1) A page should not be re-read or re-allocated without uncaching it first.
|
||||||
|
|
||||||
|
(2) A read or allocated page must be uncached when the netfs page is released
|
||||||
|
from the pagecache.
|
||||||
|
|
||||||
|
(3) A page should only be written to the cache if previous read or allocated.
|
||||||
|
|
||||||
|
This permits the cache to maintain its page tracking in proper order.
|
||||||
|
|
||||||
|
|
||||||
|
PAGE READ
|
||||||
|
---------
|
||||||
|
|
||||||
|
Firstly, the netfs should ask FS-Cache to examine the caches and read the
|
||||||
|
contents cached for a particular page of a particular file if present, or else
|
||||||
|
allocate space to store the contents if not:
|
||||||
|
|
||||||
|
typedef
|
||||||
|
void (*fscache_rw_complete_t)(struct page *page,
|
||||||
|
void *context,
|
||||||
|
int error);
|
||||||
|
|
||||||
|
int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
|
||||||
|
struct page *page,
|
||||||
|
fscache_rw_complete_t end_io_func,
|
||||||
|
void *context,
|
||||||
|
gfp_t gfp);
|
||||||
|
|
||||||
|
The cookie argument must specify a cookie for an object that isn't an index,
|
||||||
|
the page specified will have the data loaded into it (and is also used to
|
||||||
|
specify the page number), and the gfp argument is used to control how any
|
||||||
|
memory allocations made are satisfied.
|
||||||
|
|
||||||
|
If the cookie indicates the inode is not cached:
|
||||||
|
|
||||||
|
(1) The function will return -ENOBUFS.
|
||||||
|
|
||||||
|
Else if there's a copy of the page resident in the cache:
|
||||||
|
|
||||||
|
(1) The mark_pages_cached() cookie operation will be called on that page.
|
||||||
|
|
||||||
|
(2) The function will submit a request to read the data from the cache's
|
||||||
|
backing device directly into the page specified.
|
||||||
|
|
||||||
|
(3) The function will return 0.
|
||||||
|
|
||||||
|
(4) When the read is complete, end_io_func() will be invoked with:
|
||||||
|
|
||||||
|
(*) The netfs data supplied when the cookie was created.
|
||||||
|
|
||||||
|
(*) The page descriptor.
|
||||||
|
|
||||||
|
(*) The context argument passed to the above function. This will be
|
||||||
|
maintained with the get_context/put_context functions mentioned above.
|
||||||
|
|
||||||
|
(*) An argument that's 0 on success or negative for an error code.
|
||||||
|
|
||||||
|
If an error occurs, it should be assumed that the page contains no usable
|
||||||
|
data.
|
||||||
|
|
||||||
|
end_io_func() will be called in process context if the read is results in
|
||||||
|
an error, but it might be called in interrupt context if the read is
|
||||||
|
successful.
|
||||||
|
|
||||||
|
Otherwise, if there's not a copy available in cache, but the cache may be able
|
||||||
|
to store the page:
|
||||||
|
|
||||||
|
(1) The mark_pages_cached() cookie operation will be called on that page.
|
||||||
|
|
||||||
|
(2) A block may be reserved in the cache and attached to the object at the
|
||||||
|
appropriate place.
|
||||||
|
|
||||||
|
(3) The function will return -ENODATA.
|
||||||
|
|
||||||
|
This function may also return -ENOMEM or -EINTR, in which case it won't have
|
||||||
|
read any data from the cache.
|
||||||
|
|
||||||
|
|
||||||
|
PAGE ALLOCATE
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Alternatively, if there's not expected to be any data in the cache for a page
|
||||||
|
because the file has been extended, a block can simply be allocated instead:
|
||||||
|
|
||||||
|
int fscache_alloc_page(struct fscache_cookie *cookie,
|
||||||
|
struct page *page,
|
||||||
|
gfp_t gfp);
|
||||||
|
|
||||||
|
This is similar to the fscache_read_or_alloc_page() function, except that it
|
||||||
|
never reads from the cache. It will return 0 if a block has been allocated,
|
||||||
|
rather than -ENODATA as the other would. One or the other must be performed
|
||||||
|
before writing to the cache.
|
||||||
|
|
||||||
|
The mark_pages_cached() cookie operation will be called on the page if
|
||||||
|
successful.
|
||||||
|
|
||||||
|
|
||||||
|
PAGE WRITE
|
||||||
|
----------
|
||||||
|
|
||||||
|
Secondly, if the netfs changes the contents of the page (either due to an
|
||||||
|
initial download or if a user performs a write), then the page should be
|
||||||
|
written back to the cache:
|
||||||
|
|
||||||
|
int fscache_write_page(struct fscache_cookie *cookie,
|
||||||
|
struct page *page,
|
||||||
|
gfp_t gfp);
|
||||||
|
|
||||||
|
The cookie argument must specify a data file cookie, the page specified should
|
||||||
|
contain the data to be written (and is also used to specify the page number),
|
||||||
|
and the gfp argument is used to control how any memory allocations made are
|
||||||
|
satisfied.
|
||||||
|
|
||||||
|
The page must have first been read or allocated successfully and must not have
|
||||||
|
been uncached before writing is performed.
|
||||||
|
|
||||||
|
If the cookie indicates the inode is not cached then:
|
||||||
|
|
||||||
|
(1) The function will return -ENOBUFS.
|
||||||
|
|
||||||
|
Else if space can be allocated in the cache to hold this page:
|
||||||
|
|
||||||
|
(1) PG_fscache_write will be set on the page.
|
||||||
|
|
||||||
|
(2) The function will submit a request to write the data to cache's backing
|
||||||
|
device directly from the page specified.
|
||||||
|
|
||||||
|
(3) The function will return 0.
|
||||||
|
|
||||||
|
(4) When the write is complete PG_fscache_write is cleared on the page and
|
||||||
|
anyone waiting for that bit will be woken up.
|
||||||
|
|
||||||
|
Else if there's no space available in the cache, -ENOBUFS will be returned. It
|
||||||
|
is also possible for the PG_fscache_write bit to be cleared when no write took
|
||||||
|
place if unforeseen circumstances arose (such as a disk error).
|
||||||
|
|
||||||
|
Writing takes place asynchronously.
|
||||||
|
|
||||||
|
|
||||||
|
MULTIPLE PAGE READ
|
||||||
|
------------------
|
||||||
|
|
||||||
|
A facility is provided to read several pages at once, as requested by the
|
||||||
|
readpages() address space operation:
|
||||||
|
|
||||||
|
int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
|
||||||
|
struct address_space *mapping,
|
||||||
|
struct list_head *pages,
|
||||||
|
int *nr_pages,
|
||||||
|
fscache_rw_complete_t end_io_func,
|
||||||
|
void *context,
|
||||||
|
gfp_t gfp);
|
||||||
|
|
||||||
|
This works in a similar way to fscache_read_or_alloc_page(), except:
|
||||||
|
|
||||||
|
(1) Any page it can retrieve data for is removed from pages and nr_pages and
|
||||||
|
dispatched for reading to the disk. Reads of adjacent pages on disk may
|
||||||
|
be merged for greater efficiency.
|
||||||
|
|
||||||
|
(2) The mark_pages_cached() cookie operation will be called on several pages
|
||||||
|
at once if they're being read or allocated.
|
||||||
|
|
||||||
|
(3) If there was an general error, then that error will be returned.
|
||||||
|
|
||||||
|
Else if some pages couldn't be allocated or read, then -ENOBUFS will be
|
||||||
|
returned.
|
||||||
|
|
||||||
|
Else if some pages couldn't be read but were allocated, then -ENODATA will
|
||||||
|
be returned.
|
||||||
|
|
||||||
|
Otherwise, if all pages had reads dispatched, then 0 will be returned, the
|
||||||
|
list will be empty and *nr_pages will be 0.
|
||||||
|
|
||||||
|
(4) end_io_func will be called once for each page being read as the reads
|
||||||
|
complete. It will be called in process context if error != 0, but it may
|
||||||
|
be called in interrupt context if there is no error.
|
||||||
|
|
||||||
|
Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude
|
||||||
|
some of the pages being read and some being allocated. Those pages will have
|
||||||
|
been marked appropriately and will need uncaching.
|
||||||
|
|
||||||
|
|
||||||
|
==============
|
||||||
|
PAGE UNCACHING
|
||||||
|
==============
|
||||||
|
|
||||||
|
To uncache a page, this function should be called:
|
||||||
|
|
||||||
|
void fscache_uncache_page(struct fscache_cookie *cookie,
|
||||||
|
struct page *page);
|
||||||
|
|
||||||
|
This function permits the cache to release any in-memory representation it
|
||||||
|
might be holding for this netfs page. This function must be called once for
|
||||||
|
each page on which the read or write page functions above have been called to
|
||||||
|
make sure the cache's in-memory tracking information gets torn down.
|
||||||
|
|
||||||
|
Note that pages can't be explicitly deleted from the a data file. The whole
|
||||||
|
data file must be retired (see the relinquish cookie function below).
|
||||||
|
|
||||||
|
Furthermore, note that this does not cancel the asynchronous read or write
|
||||||
|
operation started by the read/alloc and write functions, so the page
|
||||||
|
invalidation and release functions must use:
|
||||||
|
|
||||||
|
bool fscache_check_page_write(struct fscache_cookie *cookie,
|
||||||
|
struct page *page);
|
||||||
|
|
||||||
|
to see if a page is being written to the cache, and:
|
||||||
|
|
||||||
|
void fscache_wait_on_page_write(struct fscache_cookie *cookie,
|
||||||
|
struct page *page);
|
||||||
|
|
||||||
|
to wait for it to finish if it is.
|
||||||
|
|
||||||
|
|
||||||
|
==========================
|
||||||
|
INDEX AND DATA FILE UPDATE
|
||||||
|
==========================
|
||||||
|
|
||||||
|
To request an update of the index data for an index or other object, the
|
||||||
|
following function should be called:
|
||||||
|
|
||||||
|
void fscache_update_cookie(struct fscache_cookie *cookie);
|
||||||
|
|
||||||
|
This function will refer back to the netfs_data pointer stored in the cookie by
|
||||||
|
the acquisition function to obtain the data to write into each revised index
|
||||||
|
entry. The update method in the parent index definition will be called to
|
||||||
|
transfer the data.
|
||||||
|
|
||||||
|
Note that partial updates may happen automatically at other times, such as when
|
||||||
|
data blocks are added to a data file object.
|
||||||
|
|
||||||
|
|
||||||
|
===============================
|
||||||
|
MISCELLANEOUS COOKIE OPERATIONS
|
||||||
|
===============================
|
||||||
|
|
||||||
|
There are a number of operations that can be used to control cookies:
|
||||||
|
|
||||||
|
(*) Cookie pinning:
|
||||||
|
|
||||||
|
int fscache_pin_cookie(struct fscache_cookie *cookie);
|
||||||
|
void fscache_unpin_cookie(struct fscache_cookie *cookie);
|
||||||
|
|
||||||
|
These operations permit data cookies to be pinned into the cache and to
|
||||||
|
have the pinning removed. They are not permitted on index cookies.
|
||||||
|
|
||||||
|
The pinning function will return 0 if successful, -ENOBUFS in the cookie
|
||||||
|
isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning,
|
||||||
|
-ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
|
||||||
|
-EIO if there's any other problem.
|
||||||
|
|
||||||
|
(*) Data space reservation:
|
||||||
|
|
||||||
|
int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size);
|
||||||
|
|
||||||
|
This permits a netfs to request cache space be reserved to store up to the
|
||||||
|
given amount of a file. It is permitted to ask for more than the current
|
||||||
|
size of the file to allow for future file expansion.
|
||||||
|
|
||||||
|
If size is given as zero then the reservation will be cancelled.
|
||||||
|
|
||||||
|
The function will return 0 if successful, -ENOBUFS in the cookie isn't
|
||||||
|
backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations,
|
||||||
|
-ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
|
||||||
|
-EIO if there's any other problem.
|
||||||
|
|
||||||
|
Note that this doesn't pin an object in a cache; it can still be culled to
|
||||||
|
make space if it's not in use.
|
||||||
|
|
||||||
|
|
||||||
|
=====================
|
||||||
|
COOKIE UNREGISTRATION
|
||||||
|
=====================
|
||||||
|
|
||||||
|
To get rid of a cookie, this function should be called.
|
||||||
|
|
||||||
|
void fscache_relinquish_cookie(struct fscache_cookie *cookie,
|
||||||
|
int retire);
|
||||||
|
|
||||||
|
If retire is non-zero, then the object will be marked for recycling, and all
|
||||||
|
copies of it will be removed from all active caches in which it is present.
|
||||||
|
Not only that but all child objects will also be retired.
|
||||||
|
|
||||||
|
If retire is zero, then the object may be available again when next the
|
||||||
|
acquisition function is called. Retirement here will overrule the pinning on a
|
||||||
|
cookie.
|
||||||
|
|
||||||
|
One very important note - relinquish must NOT be called for a cookie unless all
|
||||||
|
the cookies for "child" indices, objects and pages have been relinquished
|
||||||
|
first.
|
||||||
|
|
||||||
|
|
||||||
|
================================
|
||||||
|
INDEX AND DATA FILE INVALIDATION
|
||||||
|
================================
|
||||||
|
|
||||||
|
There is no direct way to invalidate an index subtree or a data file. To do
|
||||||
|
this, the caller should relinquish and retire the cookie they have, and then
|
||||||
|
acquire a new one.
|
||||||
|
|
||||||
|
|
||||||
|
===========================
|
||||||
|
FS-CACHE SPECIFIC PAGE FLAG
|
||||||
|
===========================
|
||||||
|
|
||||||
|
FS-Cache makes use of a page flag, PG_private_2, for its own purpose. This is
|
||||||
|
given the alternative name PG_fscache.
|
||||||
|
|
||||||
|
PG_fscache is used to indicate that the page is known by the cache, and that
|
||||||
|
the cache must be informed if the page is going to go away. It's an indication
|
||||||
|
to the netfs that the cache has an interest in this page, where an interest may
|
||||||
|
be a pointer to it, resources allocated or reserved for it, or I/O in progress
|
||||||
|
upon it.
|
||||||
|
|
||||||
|
The netfs can use this information in methods such as releasepage() to
|
||||||
|
determine whether it needs to uncache a page or update it.
|
||||||
|
|
||||||
|
Furthermore, if this bit is set, releasepage() and invalidatepage() operations
|
||||||
|
will be called on a page to get rid of it, even if PG_private is not set. This
|
||||||
|
allows caching to attempted on a page before read_cache_pages() to be called
|
||||||
|
after fscache_read_or_alloc_pages() as the former will try and release pages it
|
||||||
|
was given under certain circumstances.
|
||||||
|
|
||||||
|
This bit does not overlap with such as PG_private. This means that FS-Cache
|
||||||
|
can be used with a filesystem that uses the block buffering code.
|
||||||
|
|
||||||
|
There are a number of operations defined on this flag:
|
||||||
|
|
||||||
|
int PageFsCache(struct page *page);
|
||||||
|
void SetPageFsCache(struct page *page)
|
||||||
|
void ClearPageFsCache(struct page *page)
|
||||||
|
int TestSetPageFsCache(struct page *page)
|
||||||
|
int TestClearPageFsCache(struct page *page)
|
||||||
|
|
||||||
|
These functions are bit test, bit set, bit clear, bit test and set and bit
|
||||||
|
test and clear operations on PG_fscache.
|
313
Documentation/filesystems/caching/object.txt
Normal file
313
Documentation/filesystems/caching/object.txt
Normal file
|
@ -0,0 +1,313 @@
|
||||||
|
====================================================
|
||||||
|
IN-KERNEL CACHE OBJECT REPRESENTATION AND MANAGEMENT
|
||||||
|
====================================================
|
||||||
|
|
||||||
|
By: David Howells <dhowells@redhat.com>
|
||||||
|
|
||||||
|
Contents:
|
||||||
|
|
||||||
|
(*) Representation
|
||||||
|
|
||||||
|
(*) Object management state machine.
|
||||||
|
|
||||||
|
- Provision of cpu time.
|
||||||
|
- Locking simplification.
|
||||||
|
|
||||||
|
(*) The set of states.
|
||||||
|
|
||||||
|
(*) The set of events.
|
||||||
|
|
||||||
|
|
||||||
|
==============
|
||||||
|
REPRESENTATION
|
||||||
|
==============
|
||||||
|
|
||||||
|
FS-Cache maintains an in-kernel representation of each object that a netfs is
|
||||||
|
currently interested in. Such objects are represented by the fscache_cookie
|
||||||
|
struct and are referred to as cookies.
|
||||||
|
|
||||||
|
FS-Cache also maintains a separate in-kernel representation of the objects that
|
||||||
|
a cache backend is currently actively caching. Such objects are represented by
|
||||||
|
the fscache_object struct. The cache backends allocate these upon request, and
|
||||||
|
are expected to embed them in their own representations. These are referred to
|
||||||
|
as objects.
|
||||||
|
|
||||||
|
There is a 1:N relationship between cookies and objects. A cookie may be
|
||||||
|
represented by multiple objects - an index may exist in more than one cache -
|
||||||
|
or even by no objects (it may not be cached).
|
||||||
|
|
||||||
|
Furthermore, both cookies and objects are hierarchical. The two hierarchies
|
||||||
|
correspond, but the cookies tree is a superset of the union of the object trees
|
||||||
|
of multiple caches:
|
||||||
|
|
||||||
|
NETFS INDEX TREE : CACHE 1 : CACHE 2
|
||||||
|
: :
|
||||||
|
: +-----------+ :
|
||||||
|
+----------->| IObject | :
|
||||||
|
+-----------+ | : +-----------+ :
|
||||||
|
| ICookie |-------+ : | :
|
||||||
|
+-----------+ | : | : +-----------+
|
||||||
|
| +------------------------------>| IObject |
|
||||||
|
| : | : +-----------+
|
||||||
|
| : V : |
|
||||||
|
| : +-----------+ : |
|
||||||
|
V +----------->| IObject | : |
|
||||||
|
+-----------+ | : +-----------+ : |
|
||||||
|
| ICookie |-------+ : | : V
|
||||||
|
+-----------+ | : | : +-----------+
|
||||||
|
| +------------------------------>| IObject |
|
||||||
|
+-----+-----+ : | : +-----------+
|
||||||
|
| | : | : |
|
||||||
|
V | : V : |
|
||||||
|
+-----------+ | : +-----------+ : |
|
||||||
|
| ICookie |------------------------->| IObject | : |
|
||||||
|
+-----------+ | : +-----------+ : |
|
||||||
|
| V : | : V
|
||||||
|
| +-----------+ : | : +-----------+
|
||||||
|
| | ICookie |-------------------------------->| IObject |
|
||||||
|
| +-----------+ : | : +-----------+
|
||||||
|
V | : V : |
|
||||||
|
+-----------+ | : +-----------+ : |
|
||||||
|
| DCookie |------------------------->| DObject | : |
|
||||||
|
+-----------+ | : +-----------+ : |
|
||||||
|
| : : |
|
||||||
|
+-------+-------+ : : |
|
||||||
|
| | : : |
|
||||||
|
V V : : V
|
||||||
|
+-----------+ +-----------+ : : +-----------+
|
||||||
|
| DCookie | | DCookie |------------------------>| DObject |
|
||||||
|
+-----------+ +-----------+ : : +-----------+
|
||||||
|
: :
|
||||||
|
|
||||||
|
In the above illustration, ICookie and IObject represent indices and DCookie
|
||||||
|
and DObject represent data storage objects. Indices may have representation in
|
||||||
|
multiple caches, but currently, non-index objects may not. Objects of any type
|
||||||
|
may also be entirely unrepresented.
|
||||||
|
|
||||||
|
As far as the netfs API goes, the netfs is only actually permitted to see
|
||||||
|
pointers to the cookies. The cookies themselves and any objects attached to
|
||||||
|
those cookies are hidden from it.
|
||||||
|
|
||||||
|
|
||||||
|
===============================
|
||||||
|
OBJECT MANAGEMENT STATE MACHINE
|
||||||
|
===============================
|
||||||
|
|
||||||
|
Within FS-Cache, each active object is managed by its own individual state
|
||||||
|
machine. The state for an object is kept in the fscache_object struct, in
|
||||||
|
object->state. A cookie may point to a set of objects that are in different
|
||||||
|
states.
|
||||||
|
|
||||||
|
Each state has an action associated with it that is invoked when the machine
|
||||||
|
wakes up in that state. There are four logical sets of states:
|
||||||
|
|
||||||
|
(1) Preparation: states that wait for the parent objects to become ready. The
|
||||||
|
representations are hierarchical, and it is expected that an object must
|
||||||
|
be created or accessed with respect to its parent object.
|
||||||
|
|
||||||
|
(2) Initialisation: states that perform lookups in the cache and validate
|
||||||
|
what's found and that create on disk any missing metadata.
|
||||||
|
|
||||||
|
(3) Normal running: states that allow netfs operations on objects to proceed
|
||||||
|
and that update the state of objects.
|
||||||
|
|
||||||
|
(4) Termination: states that detach objects from their netfs cookies, that
|
||||||
|
delete objects from disk, that handle disk and system errors and that free
|
||||||
|
up in-memory resources.
|
||||||
|
|
||||||
|
|
||||||
|
In most cases, transitioning between states is in response to signalled events.
|
||||||
|
When a state has finished processing, it will usually set the mask of events in
|
||||||
|
which it is interested (object->event_mask) and relinquish the worker thread.
|
||||||
|
Then when an event is raised (by calling fscache_raise_event()), if the event
|
||||||
|
is not masked, the object will be queued for processing (by calling
|
||||||
|
fscache_enqueue_object()).
|
||||||
|
|
||||||
|
|
||||||
|
PROVISION OF CPU TIME
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
The work to be done by the various states is given CPU time by the threads of
|
||||||
|
the slow work facility (see Documentation/slow-work.txt). This is used in
|
||||||
|
preference to the workqueue facility because:
|
||||||
|
|
||||||
|
(1) Threads may be completely occupied for very long periods of time by a
|
||||||
|
particular work item. These state actions may be doing sequences of
|
||||||
|
synchronous, journalled disk accesses (lookup, mkdir, create, setxattr,
|
||||||
|
getxattr, truncate, unlink, rmdir, rename).
|
||||||
|
|
||||||
|
(2) Threads may do little actual work, but may rather spend a lot of time
|
||||||
|
sleeping on I/O. This means that single-threaded and 1-per-CPU-threaded
|
||||||
|
workqueues don't necessarily have the right numbers of threads.
|
||||||
|
|
||||||
|
|
||||||
|
LOCKING SIMPLIFICATION
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
Because only one worker thread may be operating on any particular object's
|
||||||
|
state machine at once, this simplifies the locking, particularly with respect
|
||||||
|
to disconnecting the netfs's representation of a cache object (fscache_cookie)
|
||||||
|
from the cache backend's representation (fscache_object) - which may be
|
||||||
|
requested from either end.
|
||||||
|
|
||||||
|
|
||||||
|
=================
|
||||||
|
THE SET OF STATES
|
||||||
|
=================
|
||||||
|
|
||||||
|
The object state machine has a set of states that it can be in. There are
|
||||||
|
preparation states in which the object sets itself up and waits for its parent
|
||||||
|
object to transit to a state that allows access to its children:
|
||||||
|
|
||||||
|
(1) State FSCACHE_OBJECT_INIT.
|
||||||
|
|
||||||
|
Initialise the object and wait for the parent object to become active. In
|
||||||
|
the cache, it is expected that it will not be possible to look an object
|
||||||
|
up from the parent object, until that parent object itself has been looked
|
||||||
|
up.
|
||||||
|
|
||||||
|
There are initialisation states in which the object sets itself up and accesses
|
||||||
|
disk for the object metadata:
|
||||||
|
|
||||||
|
(2) State FSCACHE_OBJECT_LOOKING_UP.
|
||||||
|
|
||||||
|
Look up the object on disk, using the parent as a starting point.
|
||||||
|
FS-Cache expects the cache backend to probe the cache to see whether this
|
||||||
|
object is represented there, and if it is, to see if it's valid (coherency
|
||||||
|
management).
|
||||||
|
|
||||||
|
The cache should call fscache_object_lookup_negative() to indicate lookup
|
||||||
|
failure for whatever reason, and should call fscache_obtained_object() to
|
||||||
|
indicate success.
|
||||||
|
|
||||||
|
At the completion of lookup, FS-Cache will let the netfs go ahead with
|
||||||
|
read operations, no matter whether the file is yet cached. If not yet
|
||||||
|
cached, read operations will be immediately rejected with ENODATA until
|
||||||
|
the first known page is uncached - as to that point there can be no data
|
||||||
|
to be read out of the cache for that file that isn't currently also held
|
||||||
|
in the pagecache.
|
||||||
|
|
||||||
|
(3) State FSCACHE_OBJECT_CREATING.
|
||||||
|
|
||||||
|
Create an object on disk, using the parent as a starting point. This
|
||||||
|
happens if the lookup failed to find the object, or if the object's
|
||||||
|
coherency data indicated what's on disk is out of date. In this state,
|
||||||
|
FS-Cache expects the cache to create
|
||||||
|
|
||||||
|
The cache should call fscache_obtained_object() if creation completes
|
||||||
|
successfully, fscache_object_lookup_negative() otherwise.
|
||||||
|
|
||||||
|
At the completion of creation, FS-Cache will start processing write
|
||||||
|
operations the netfs has queued for an object. If creation failed, the
|
||||||
|
write ops will be transparently discarded, and nothing recorded in the
|
||||||
|
cache.
|
||||||
|
|
||||||
|
There are some normal running states in which the object spends its time
|
||||||
|
servicing netfs requests:
|
||||||
|
|
||||||
|
(4) State FSCACHE_OBJECT_AVAILABLE.
|
||||||
|
|
||||||
|
A transient state in which pending operations are started, child objects
|
||||||
|
are permitted to advance from FSCACHE_OBJECT_INIT state, and temporary
|
||||||
|
lookup data is freed.
|
||||||
|
|
||||||
|
(5) State FSCACHE_OBJECT_ACTIVE.
|
||||||
|
|
||||||
|
The normal running state. In this state, requests the netfs makes will be
|
||||||
|
passed on to the cache.
|
||||||
|
|
||||||
|
(6) State FSCACHE_OBJECT_UPDATING.
|
||||||
|
|
||||||
|
The state machine comes here to update the object in the cache from the
|
||||||
|
netfs's records. This involves updating the auxiliary data that is used
|
||||||
|
to maintain coherency.
|
||||||
|
|
||||||
|
And there are terminal states in which an object cleans itself up, deallocates
|
||||||
|
memory and potentially deletes stuff from disk:
|
||||||
|
|
||||||
|
(7) State FSCACHE_OBJECT_LC_DYING.
|
||||||
|
|
||||||
|
The object comes here if it is dying because of a lookup or creation
|
||||||
|
error. This would be due to a disk error or system error of some sort.
|
||||||
|
Temporary data is cleaned up, and the parent is released.
|
||||||
|
|
||||||
|
(8) State FSCACHE_OBJECT_DYING.
|
||||||
|
|
||||||
|
The object comes here if it is dying due to an error, because its parent
|
||||||
|
cookie has been relinquished by the netfs or because the cache is being
|
||||||
|
withdrawn.
|
||||||
|
|
||||||
|
Any child objects waiting on this one are given CPU time so that they too
|
||||||
|
can destroy themselves. This object waits for all its children to go away
|
||||||
|
before advancing to the next state.
|
||||||
|
|
||||||
|
(9) State FSCACHE_OBJECT_ABORT_INIT.
|
||||||
|
|
||||||
|
The object comes to this state if it was waiting on its parent in
|
||||||
|
FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself
|
||||||
|
so that the parent may proceed from the FSCACHE_OBJECT_DYING state.
|
||||||
|
|
||||||
|
(10) State FSCACHE_OBJECT_RELEASING.
|
||||||
|
(11) State FSCACHE_OBJECT_RECYCLING.
|
||||||
|
|
||||||
|
The object comes to one of these two states when dying once it is rid of
|
||||||
|
all its children, if it is dying because the netfs relinquished its
|
||||||
|
cookie. In the first state, the cached data is expected to persist, and
|
||||||
|
in the second it will be deleted.
|
||||||
|
|
||||||
|
(12) State FSCACHE_OBJECT_WITHDRAWING.
|
||||||
|
|
||||||
|
The object transits to this state if the cache decides it wants to
|
||||||
|
withdraw the object from service, perhaps to make space, but also due to
|
||||||
|
error or just because the whole cache is being withdrawn.
|
||||||
|
|
||||||
|
(13) State FSCACHE_OBJECT_DEAD.
|
||||||
|
|
||||||
|
The object transits to this state when the in-memory object record is
|
||||||
|
ready to be deleted. The object processor shouldn't ever see an object in
|
||||||
|
this state.
|
||||||
|
|
||||||
|
|
||||||
|
THE SET OF EVENTS
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
There are a number of events that can be raised to an object state machine:
|
||||||
|
|
||||||
|
(*) FSCACHE_OBJECT_EV_UPDATE
|
||||||
|
|
||||||
|
The netfs requested that an object be updated. The state machine will ask
|
||||||
|
the cache backend to update the object, and the cache backend will ask the
|
||||||
|
netfs for details of the change through its cookie definition ops.
|
||||||
|
|
||||||
|
(*) FSCACHE_OBJECT_EV_CLEARED
|
||||||
|
|
||||||
|
This is signalled in two circumstances:
|
||||||
|
|
||||||
|
(a) when an object's last child object is dropped and
|
||||||
|
|
||||||
|
(b) when the last operation outstanding on an object is completed.
|
||||||
|
|
||||||
|
This is used to proceed from the dying state.
|
||||||
|
|
||||||
|
(*) FSCACHE_OBJECT_EV_ERROR
|
||||||
|
|
||||||
|
This is signalled when an I/O error occurs during the processing of some
|
||||||
|
object.
|
||||||
|
|
||||||
|
(*) FSCACHE_OBJECT_EV_RELEASE
|
||||||
|
(*) FSCACHE_OBJECT_EV_RETIRE
|
||||||
|
|
||||||
|
These are signalled when the netfs relinquishes a cookie it was using.
|
||||||
|
The event selected depends on whether the netfs asks for the backing
|
||||||
|
object to be retired (deleted) or retained.
|
||||||
|
|
||||||
|
(*) FSCACHE_OBJECT_EV_WITHDRAW
|
||||||
|
|
||||||
|
This is signalled when the cache backend wants to withdraw an object.
|
||||||
|
This means that the object will have to be detached from the netfs's
|
||||||
|
cookie.
|
||||||
|
|
||||||
|
Because the withdrawing releasing/retiring events are all handled by the object
|
||||||
|
state machine, it doesn't matter if there's a collision with both ends trying
|
||||||
|
to sever the connection at the same time. The state machine can just pick
|
||||||
|
which one it wants to honour, and that effects the other.
|
213
Documentation/filesystems/caching/operations.txt
Normal file
213
Documentation/filesystems/caching/operations.txt
Normal file
|
@ -0,0 +1,213 @@
|
||||||
|
================================
|
||||||
|
ASYNCHRONOUS OPERATIONS HANDLING
|
||||||
|
================================
|
||||||
|
|
||||||
|
By: David Howells <dhowells@redhat.com>
|
||||||
|
|
||||||
|
Contents:
|
||||||
|
|
||||||
|
(*) Overview.
|
||||||
|
|
||||||
|
(*) Operation record initialisation.
|
||||||
|
|
||||||
|
(*) Parameters.
|
||||||
|
|
||||||
|
(*) Procedure.
|
||||||
|
|
||||||
|
(*) Asynchronous callback.
|
||||||
|
|
||||||
|
|
||||||
|
========
|
||||||
|
OVERVIEW
|
||||||
|
========
|
||||||
|
|
||||||
|
FS-Cache has an asynchronous operations handling facility that it uses for its
|
||||||
|
data storage and retrieval routines. Its operations are represented by
|
||||||
|
fscache_operation structs, though these are usually embedded into some other
|
||||||
|
structure.
|
||||||
|
|
||||||
|
This facility is available to and expected to be be used by the cache backends,
|
||||||
|
and FS-Cache will create operations and pass them off to the appropriate cache
|
||||||
|
backend for completion.
|
||||||
|
|
||||||
|
To make use of this facility, <linux/fscache-cache.h> should be #included.
|
||||||
|
|
||||||
|
|
||||||
|
===============================
|
||||||
|
OPERATION RECORD INITIALISATION
|
||||||
|
===============================
|
||||||
|
|
||||||
|
An operation is recorded in an fscache_operation struct:
|
||||||
|
|
||||||
|
struct fscache_operation {
|
||||||
|
union {
|
||||||
|
struct work_struct fast_work;
|
||||||
|
struct slow_work slow_work;
|
||||||
|
};
|
||||||
|
unsigned long flags;
|
||||||
|
fscache_operation_processor_t processor;
|
||||||
|
...
|
||||||
|
};
|
||||||
|
|
||||||
|
Someone wanting to issue an operation should allocate something with this
|
||||||
|
struct embedded in it. They should initialise it by calling:
|
||||||
|
|
||||||
|
void fscache_operation_init(struct fscache_operation *op,
|
||||||
|
fscache_operation_release_t release);
|
||||||
|
|
||||||
|
with the operation to be initialised and the release function to use.
|
||||||
|
|
||||||
|
The op->flags parameter should be set to indicate the CPU time provision and
|
||||||
|
the exclusivity (see the Parameters section).
|
||||||
|
|
||||||
|
The op->fast_work, op->slow_work and op->processor flags should be set as
|
||||||
|
appropriate for the CPU time provision (see the Parameters section).
|
||||||
|
|
||||||
|
FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the
|
||||||
|
operation and waited for afterwards.
|
||||||
|
|
||||||
|
|
||||||
|
==========
|
||||||
|
PARAMETERS
|
||||||
|
==========
|
||||||
|
|
||||||
|
There are a number of parameters that can be set in the operation record's flag
|
||||||
|
parameter. There are three options for the provision of CPU time in these
|
||||||
|
operations:
|
||||||
|
|
||||||
|
(1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD). A thread
|
||||||
|
may decide it wants to handle an operation itself without deferring it to
|
||||||
|
another thread.
|
||||||
|
|
||||||
|
This is, for example, used in read operations for calling readpages() on
|
||||||
|
the backing filesystem in CacheFiles. Although readpages() does an
|
||||||
|
asynchronous data fetch, the determination of whether pages exist is done
|
||||||
|
synchronously - and the netfs does not proceed until this has been
|
||||||
|
determined.
|
||||||
|
|
||||||
|
If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags
|
||||||
|
before submitting the operation, and the operating thread must wait for it
|
||||||
|
to be cleared before proceeding:
|
||||||
|
|
||||||
|
wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
|
||||||
|
fscache_wait_bit, TASK_UNINTERRUPTIBLE);
|
||||||
|
|
||||||
|
|
||||||
|
(2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it
|
||||||
|
will be given to keventd to process. Such an operation is not permitted
|
||||||
|
to sleep on I/O.
|
||||||
|
|
||||||
|
This is, for example, used by CacheFiles to copy data from a backing fs
|
||||||
|
page to a netfs page after the backing fs has read the page in.
|
||||||
|
|
||||||
|
If this option is used, op->fast_work and op->processor must be
|
||||||
|
initialised before submitting the operation:
|
||||||
|
|
||||||
|
INIT_WORK(&op->fast_work, do_some_work);
|
||||||
|
|
||||||
|
|
||||||
|
(3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it
|
||||||
|
will be given to the slow work facility to process. Such an operation is
|
||||||
|
permitted to sleep on I/O.
|
||||||
|
|
||||||
|
This is, for example, used by FS-Cache to handle background writes of
|
||||||
|
pages that have just been fetched from a remote server.
|
||||||
|
|
||||||
|
If this option is used, op->slow_work and op->processor must be
|
||||||
|
initialised before submitting the operation:
|
||||||
|
|
||||||
|
fscache_operation_init_slow(op, processor)
|
||||||
|
|
||||||
|
|
||||||
|
Furthermore, operations may be one of two types:
|
||||||
|
|
||||||
|
(1) Exclusive (FSCACHE_OP_EXCLUSIVE). Operations of this type may not run in
|
||||||
|
conjunction with any other operation on the object being operated upon.
|
||||||
|
|
||||||
|
An example of this is the attribute change operation, in which the file
|
||||||
|
being written to may need truncation.
|
||||||
|
|
||||||
|
(2) Shareable. Operations of this type may be running simultaneously. It's
|
||||||
|
up to the operation implementation to prevent interference between other
|
||||||
|
operations running at the same time.
|
||||||
|
|
||||||
|
|
||||||
|
=========
|
||||||
|
PROCEDURE
|
||||||
|
=========
|
||||||
|
|
||||||
|
Operations are used through the following procedure:
|
||||||
|
|
||||||
|
(1) The submitting thread must allocate the operation and initialise it
|
||||||
|
itself. Normally this would be part of a more specific structure with the
|
||||||
|
generic op embedded within.
|
||||||
|
|
||||||
|
(2) The submitting thread must then submit the operation for processing using
|
||||||
|
one of the following two functions:
|
||||||
|
|
||||||
|
int fscache_submit_op(struct fscache_object *object,
|
||||||
|
struct fscache_operation *op);
|
||||||
|
|
||||||
|
int fscache_submit_exclusive_op(struct fscache_object *object,
|
||||||
|
struct fscache_operation *op);
|
||||||
|
|
||||||
|
The first function should be used to submit non-exclusive ops and the
|
||||||
|
second to submit exclusive ones. The caller must still set the
|
||||||
|
FSCACHE_OP_EXCLUSIVE flag.
|
||||||
|
|
||||||
|
If successful, both functions will assign the operation to the specified
|
||||||
|
object and return 0. -ENOBUFS will be returned if the object specified is
|
||||||
|
permanently unavailable.
|
||||||
|
|
||||||
|
The operation manager will defer operations on an object that is still
|
||||||
|
undergoing lookup or creation. The operation will also be deferred if an
|
||||||
|
operation of conflicting exclusivity is in progress on the object.
|
||||||
|
|
||||||
|
If the operation is asynchronous, the manager will retain a reference to
|
||||||
|
it, so the caller should put their reference to it by passing it to:
|
||||||
|
|
||||||
|
void fscache_put_operation(struct fscache_operation *op);
|
||||||
|
|
||||||
|
(3) If the submitting thread wants to do the work itself, and has marked the
|
||||||
|
operation with FSCACHE_OP_MYTHREAD, then it should monitor
|
||||||
|
FSCACHE_OP_WAITING as described above and check the state of the object if
|
||||||
|
necessary (the object might have died whilst the thread was waiting).
|
||||||
|
|
||||||
|
When it has finished doing its processing, it should call
|
||||||
|
fscache_put_operation() on it.
|
||||||
|
|
||||||
|
(4) The operation holds an effective lock upon the object, preventing other
|
||||||
|
exclusive ops conflicting until it is released. The operation can be
|
||||||
|
enqueued for further immediate asynchronous processing by adjusting the
|
||||||
|
CPU time provisioning option if necessary, eg:
|
||||||
|
|
||||||
|
op->flags &= ~FSCACHE_OP_TYPE;
|
||||||
|
op->flags |= ~FSCACHE_OP_FAST;
|
||||||
|
|
||||||
|
and calling:
|
||||||
|
|
||||||
|
void fscache_enqueue_operation(struct fscache_operation *op)
|
||||||
|
|
||||||
|
This can be used to allow other things to have use of the worker thread
|
||||||
|
pools.
|
||||||
|
|
||||||
|
|
||||||
|
=====================
|
||||||
|
ASYNCHRONOUS CALLBACK
|
||||||
|
=====================
|
||||||
|
|
||||||
|
When used in asynchronous mode, the worker thread pool will invoke the
|
||||||
|
processor method with a pointer to the operation. This should then get at the
|
||||||
|
container struct by using container_of():
|
||||||
|
|
||||||
|
static void fscache_write_op(struct fscache_operation *_op)
|
||||||
|
{
|
||||||
|
struct fscache_storage *op =
|
||||||
|
container_of(_op, struct fscache_storage, op);
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
The caller holds a reference on the operation, and will invoke
|
||||||
|
fscache_put_operation() when the processor function returns. The processor
|
||||||
|
function is at liberty to call fscache_enqueue_operation() or to take extra
|
||||||
|
references.
|
176
Documentation/filesystems/exofs.txt
Normal file
176
Documentation/filesystems/exofs.txt
Normal file
|
@ -0,0 +1,176 @@
|
||||||
|
===============================================================================
|
||||||
|
WHAT IS EXOFS?
|
||||||
|
===============================================================================
|
||||||
|
|
||||||
|
exofs is a file system that uses an OSD and exports the API of a normal Linux
|
||||||
|
file system. Users access exofs like any other local file system, and exofs
|
||||||
|
will in turn issue commands to the local OSD initiator.
|
||||||
|
|
||||||
|
OSD is a new T10 command set that views storage devices not as a large/flat
|
||||||
|
array of sectors but as a container of objects, each having a length, quota,
|
||||||
|
time attributes and more. Each object is addressed by a 64bit ID, and is
|
||||||
|
contained in a 64bit ID partition. Each object has associated attributes
|
||||||
|
attached to it, which are integral part of the object and provide metadata about
|
||||||
|
the object. The standard defines some common obligatory attributes, but user
|
||||||
|
attributes can be added as needed.
|
||||||
|
|
||||||
|
===============================================================================
|
||||||
|
ENVIRONMENT
|
||||||
|
===============================================================================
|
||||||
|
|
||||||
|
To use this file system, you need to have an object store to run it on. You
|
||||||
|
may download a target from:
|
||||||
|
http://open-osd.org
|
||||||
|
|
||||||
|
See Documentation/scsi/osd.txt for how to setup a working osd environment.
|
||||||
|
|
||||||
|
===============================================================================
|
||||||
|
USAGE
|
||||||
|
===============================================================================
|
||||||
|
|
||||||
|
1. Download and compile exofs and open-osd initiator:
|
||||||
|
You need an external Kernel source tree or kernel headers from your
|
||||||
|
distribution. (anything based on 2.6.26 or later).
|
||||||
|
|
||||||
|
a. download open-osd including exofs source using:
|
||||||
|
[parent-directory]$ git clone git://git.open-osd.org/open-osd.git
|
||||||
|
|
||||||
|
b. Build the library module like this:
|
||||||
|
[parent-directory]$ make -C KSRC=$(KER_DIR) open-osd
|
||||||
|
|
||||||
|
This will build both the open-osd initiator as well as the exofs kernel
|
||||||
|
module. Use whatever parameters you compiled your Kernel with and
|
||||||
|
$(KER_DIR) above pointing to the Kernel you compile against. See the file
|
||||||
|
open-osd/top-level-Makefile for an example.
|
||||||
|
|
||||||
|
2. Get the OSD initiator and target set up properly, and login to the target.
|
||||||
|
See Documentation/scsi/osd.txt for farther instructions. Also see ./do-osd
|
||||||
|
for example script that does all these steps.
|
||||||
|
|
||||||
|
3. Insmod the exofs.ko module:
|
||||||
|
[exofs]$ insmod exofs.ko
|
||||||
|
|
||||||
|
4. Make sure the directory where you want to mount exists. If not, create it.
|
||||||
|
(For example, mkdir /mnt/exofs)
|
||||||
|
|
||||||
|
5. At first run you will need to invoke the mkfs.exofs application
|
||||||
|
|
||||||
|
As an example, this will create the file system on:
|
||||||
|
/dev/osd0 partition ID 65536
|
||||||
|
|
||||||
|
mkfs.exofs --pid=65536 --format /dev/osd0
|
||||||
|
|
||||||
|
The --format is optional if not specified no OSD_FORMAT will be
|
||||||
|
preformed and a clean file system will be created in the specified pid,
|
||||||
|
in the available space of the target. (Use --format=size_in_meg to limit
|
||||||
|
the total LUN space available)
|
||||||
|
|
||||||
|
If pid already exist it will be deleted and a new one will be created in it's
|
||||||
|
place. Be careful.
|
||||||
|
|
||||||
|
An exofs lives inside a single OSD partition. You can create multiple exofs
|
||||||
|
filesystems on the same device using multiple pids.
|
||||||
|
|
||||||
|
(run mkfs.exofs without any parameters for usage help message)
|
||||||
|
|
||||||
|
6. Mount the file system.
|
||||||
|
|
||||||
|
For example, to mount /dev/osd0, partition ID 0x10000 on /mnt/exofs:
|
||||||
|
|
||||||
|
mount -t exofs -o pid=65536 /dev/osd0 /mnt/exofs/
|
||||||
|
|
||||||
|
7. For reference (See do-exofs example script):
|
||||||
|
do-exofs start - an example of how to perform the above steps.
|
||||||
|
do-exofs stop - an example of how to unmount the file system.
|
||||||
|
do-exofs format - an example of how to format and mkfs a new exofs.
|
||||||
|
|
||||||
|
8. Extra compilation flags (uncomment in fs/exofs/Kbuild):
|
||||||
|
CONFIG_EXOFS_DEBUG - for debug messages and extra checks.
|
||||||
|
|
||||||
|
===============================================================================
|
||||||
|
exofs mount options
|
||||||
|
===============================================================================
|
||||||
|
Similar to any mount command:
|
||||||
|
mount -t exofs -o exofs_options /dev/osdX mount_exofs_directory
|
||||||
|
|
||||||
|
Where:
|
||||||
|
-t exofs: specifies the exofs file system
|
||||||
|
|
||||||
|
/dev/osdX: X is a decimal number. /dev/osdX was created after a successful
|
||||||
|
login into an OSD target.
|
||||||
|
|
||||||
|
mount_exofs_directory: The directory to mount the file system on
|
||||||
|
|
||||||
|
exofs specific options: Options are separated by commas (,)
|
||||||
|
pid=<integer> - The partition number to mount/create as
|
||||||
|
container of the filesystem.
|
||||||
|
This option is mandatory
|
||||||
|
to=<integer> - Timeout in ticks for a single command
|
||||||
|
default is (60 * HZ) [for debugging only]
|
||||||
|
|
||||||
|
===============================================================================
|
||||||
|
DESIGN
|
||||||
|
===============================================================================
|
||||||
|
|
||||||
|
* The file system control block (AKA on-disk superblock) resides in an object
|
||||||
|
with a special ID (defined in common.h).
|
||||||
|
Information included in the file system control block is used to fill the
|
||||||
|
in-memory superblock structure at mount time. This object is created before
|
||||||
|
the file system is used by mkexofs.c It contains information such as:
|
||||||
|
- The file system's magic number
|
||||||
|
- The next inode number to be allocated
|
||||||
|
|
||||||
|
* Each file resides in its own object and contains the data (and it will be
|
||||||
|
possible to extend the file over multiple objects, though this has not been
|
||||||
|
implemented yet).
|
||||||
|
|
||||||
|
* A directory is treated as a file, and essentially contains a list of <file
|
||||||
|
name, inode #> pairs for files that are found in that directory. The object
|
||||||
|
IDs correspond to the files' inode numbers and will be allocated according to
|
||||||
|
a bitmap (stored in a separate object). Now they are allocated using a
|
||||||
|
counter.
|
||||||
|
|
||||||
|
* Each file's control block (AKA on-disk inode) is stored in its object's
|
||||||
|
attributes. This applies to both regular files and other types (directories,
|
||||||
|
device files, symlinks, etc.).
|
||||||
|
|
||||||
|
* Credentials are generated per object (inode and superblock) when they is
|
||||||
|
created in memory (read off disk or created). The credential works for all
|
||||||
|
operations and is used as long as the object remains in memory.
|
||||||
|
|
||||||
|
* Async OSD operations are used whenever possible, but the target may execute
|
||||||
|
them out of order. The operations that concern us are create, delete,
|
||||||
|
readpage, writepage, update_inode, and truncate. The following pairs of
|
||||||
|
operations should execute in the order written, and we need to prevent them
|
||||||
|
from executing in reverse order:
|
||||||
|
- The following are handled with the OBJ_CREATED and OBJ_2BCREATED
|
||||||
|
flags. OBJ_CREATED is set when we know the object exists on the OSD -
|
||||||
|
in create's callback function, and when we successfully do a read_inode.
|
||||||
|
OBJ_2BCREATED is set in the beginning of the create function, so we
|
||||||
|
know that we should wait.
|
||||||
|
- create/delete: delete should wait until the object is created
|
||||||
|
on the OSD.
|
||||||
|
- create/readpage: readpage should be able to return a page
|
||||||
|
full of zeroes in this case. If there was a write already
|
||||||
|
en-route (i.e. create, writepage, readpage) then the page
|
||||||
|
would be locked, and so it would really be the same as
|
||||||
|
create/writepage.
|
||||||
|
- create/writepage: if writepage is called for a sync write, it
|
||||||
|
should wait until the object is created on the OSD.
|
||||||
|
Otherwise, it should just return.
|
||||||
|
- create/truncate: truncate should wait until the object is
|
||||||
|
created on the OSD.
|
||||||
|
- create/update_inode: update_inode should wait until the
|
||||||
|
object is created on the OSD.
|
||||||
|
- Handled by VFS locks:
|
||||||
|
- readpage/delete: shouldn't happen because of page lock.
|
||||||
|
- writepage/delete: shouldn't happen because of page lock.
|
||||||
|
- readpage/writepage: shouldn't happen because of page lock.
|
||||||
|
|
||||||
|
===============================================================================
|
||||||
|
LICENSE/COPYRIGHT
|
||||||
|
===============================================================================
|
||||||
|
The exofs file system is based on ext2 v0.5b (distributed with the Linux kernel
|
||||||
|
version 2.6.10). All files include the original copyrights, and the license
|
||||||
|
is GPL version 2 (only version 2, as is true for the Linux kernel). The
|
||||||
|
Linux kernel can be downloaded from www.kernel.org.
|
|
@ -14,6 +14,11 @@ Options
|
||||||
When mounting an ext3 filesystem, the following option are accepted:
|
When mounting an ext3 filesystem, the following option are accepted:
|
||||||
(*) == default
|
(*) == default
|
||||||
|
|
||||||
|
ro Mount filesystem read only. Note that ext3 will replay
|
||||||
|
the journal (and thus write to the partition) even when
|
||||||
|
mounted "read only". Mount options "ro,noload" can be
|
||||||
|
used to prevent writes to the filesystem.
|
||||||
|
|
||||||
journal=update Update the ext3 file system's journal to the current
|
journal=update Update the ext3 file system's journal to the current
|
||||||
format.
|
format.
|
||||||
|
|
||||||
|
@ -27,7 +32,9 @@ journal_dev=devnum When the external journal device's major/minor numbers
|
||||||
identified through its new major/minor numbers encoded
|
identified through its new major/minor numbers encoded
|
||||||
in devnum.
|
in devnum.
|
||||||
|
|
||||||
noload Don't load the journal on mounting.
|
noload Don't load the journal on mounting. Note that this forces
|
||||||
|
mount of inconsistent filesystem, which can lead to
|
||||||
|
various problems.
|
||||||
|
|
||||||
data=journal All data are committed into the journal prior to being
|
data=journal All data are committed into the journal prior to being
|
||||||
written into the main file system.
|
written into the main file system.
|
||||||
|
@ -92,9 +99,12 @@ nocheck
|
||||||
|
|
||||||
debug Extra debugging information is sent to syslog.
|
debug Extra debugging information is sent to syslog.
|
||||||
|
|
||||||
errors=remount-ro(*) Remount the filesystem read-only on an error.
|
errors=remount-ro Remount the filesystem read-only on an error.
|
||||||
errors=continue Keep going on a filesystem error.
|
errors=continue Keep going on a filesystem error.
|
||||||
errors=panic Panic and halt the machine if an error occurs.
|
errors=panic Panic and halt the machine if an error occurs.
|
||||||
|
(These mount options override the errors behavior
|
||||||
|
specified in the superblock, which can be
|
||||||
|
configured using tune2fs.)
|
||||||
|
|
||||||
data_err=ignore(*) Just print an error message if an error occurs
|
data_err=ignore(*) Just print an error message if an error occurs
|
||||||
in a file data buffer in ordered mode.
|
in a file data buffer in ordered mode.
|
||||||
|
|
159
Documentation/filesystems/knfsd-stats.txt
Normal file
159
Documentation/filesystems/knfsd-stats.txt
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
|
||||||
|
Kernel NFS Server Statistics
|
||||||
|
============================
|
||||||
|
|
||||||
|
This document describes the format and semantics of the statistics
|
||||||
|
which the kernel NFS server makes available to userspace. These
|
||||||
|
statistics are available in several text form pseudo files, each of
|
||||||
|
which is described separately below.
|
||||||
|
|
||||||
|
In most cases you don't need to know these formats, as the nfsstat(8)
|
||||||
|
program from the nfs-utils distribution provides a helpful command-line
|
||||||
|
interface for extracting and printing them.
|
||||||
|
|
||||||
|
All the files described here are formatted as a sequence of text lines,
|
||||||
|
separated by newline '\n' characters. Lines beginning with a hash
|
||||||
|
'#' character are comments intended for humans and should be ignored
|
||||||
|
by parsing routines. All other lines contain a sequence of fields
|
||||||
|
separated by whitespace.
|
||||||
|
|
||||||
|
/proc/fs/nfsd/pool_stats
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
This file is available in kernels from 2.6.30 onwards, if the
|
||||||
|
/proc/fs/nfsd filesystem is mounted (it almost always should be).
|
||||||
|
|
||||||
|
The first line is a comment which describes the fields present in
|
||||||
|
all the other lines. The other lines present the following data as
|
||||||
|
a sequence of unsigned decimal numeric fields. One line is shown
|
||||||
|
for each NFS thread pool.
|
||||||
|
|
||||||
|
All counters are 64 bits wide and wrap naturally. There is no way
|
||||||
|
to zero these counters, instead applications should do their own
|
||||||
|
rate conversion.
|
||||||
|
|
||||||
|
pool
|
||||||
|
The id number of the NFS thread pool to which this line applies.
|
||||||
|
This number does not change.
|
||||||
|
|
||||||
|
Thread pool ids are a contiguous set of small integers starting
|
||||||
|
at zero. The maximum value depends on the thread pool mode, but
|
||||||
|
currently cannot be larger than the number of CPUs in the system.
|
||||||
|
Note that in the default case there will be a single thread pool
|
||||||
|
which contains all the nfsd threads and all the CPUs in the system,
|
||||||
|
and thus this file will have a single line with a pool id of "0".
|
||||||
|
|
||||||
|
packets-arrived
|
||||||
|
Counts how many NFS packets have arrived. More precisely, this
|
||||||
|
is the number of times that the network stack has notified the
|
||||||
|
sunrpc server layer that new data may be available on a transport
|
||||||
|
(e.g. an NFS or UDP socket or an NFS/RDMA endpoint).
|
||||||
|
|
||||||
|
Depending on the NFS workload patterns and various network stack
|
||||||
|
effects (such as Large Receive Offload) which can combine packets
|
||||||
|
on the wire, this may be either more or less than the number
|
||||||
|
of NFS calls received (which statistic is available elsewhere).
|
||||||
|
However this is a more accurate and less workload-dependent measure
|
||||||
|
of how much CPU load is being placed on the sunrpc server layer
|
||||||
|
due to NFS network traffic.
|
||||||
|
|
||||||
|
sockets-enqueued
|
||||||
|
Counts how many times an NFS transport is enqueued to wait for
|
||||||
|
an nfsd thread to service it, i.e. no nfsd thread was considered
|
||||||
|
available.
|
||||||
|
|
||||||
|
The circumstance this statistic tracks indicates that there was NFS
|
||||||
|
network-facing work to be done but it couldn't be done immediately,
|
||||||
|
thus introducing a small delay in servicing NFS calls. The ideal
|
||||||
|
rate of change for this counter is zero; significantly non-zero
|
||||||
|
values may indicate a performance limitation.
|
||||||
|
|
||||||
|
This can happen either because there are too few nfsd threads in the
|
||||||
|
thread pool for the NFS workload (the workload is thread-limited),
|
||||||
|
or because the NFS workload needs more CPU time than is available in
|
||||||
|
the thread pool (the workload is CPU-limited). In the former case,
|
||||||
|
configuring more nfsd threads will probably improve the performance
|
||||||
|
of the NFS workload. In the latter case, the sunrpc server layer is
|
||||||
|
already choosing not to wake idle nfsd threads because there are too
|
||||||
|
many nfsd threads which want to run but cannot, so configuring more
|
||||||
|
nfsd threads will make no difference whatsoever. The overloads-avoided
|
||||||
|
statistic (see below) can be used to distinguish these cases.
|
||||||
|
|
||||||
|
threads-woken
|
||||||
|
Counts how many times an idle nfsd thread is woken to try to
|
||||||
|
receive some data from an NFS transport.
|
||||||
|
|
||||||
|
This statistic tracks the circumstance where incoming
|
||||||
|
network-facing NFS work is being handled quickly, which is a good
|
||||||
|
thing. The ideal rate of change for this counter will be close
|
||||||
|
to but less than the rate of change of the packets-arrived counter.
|
||||||
|
|
||||||
|
overloads-avoided
|
||||||
|
Counts how many times the sunrpc server layer chose not to wake an
|
||||||
|
nfsd thread, despite the presence of idle nfsd threads, because
|
||||||
|
too many nfsd threads had been recently woken but could not get
|
||||||
|
enough CPU time to actually run.
|
||||||
|
|
||||||
|
This statistic counts a circumstance where the sunrpc layer
|
||||||
|
heuristically avoids overloading the CPU scheduler with too many
|
||||||
|
runnable nfsd threads. The ideal rate of change for this counter
|
||||||
|
is zero. Significant non-zero values indicate that the workload
|
||||||
|
is CPU limited. Usually this is associated with heavy CPU usage
|
||||||
|
on all the CPUs in the nfsd thread pool.
|
||||||
|
|
||||||
|
If a sustained large overloads-avoided rate is detected on a pool,
|
||||||
|
the top(1) utility should be used to check for the following
|
||||||
|
pattern of CPU usage on all the CPUs associated with the given
|
||||||
|
nfsd thread pool.
|
||||||
|
|
||||||
|
- %us ~= 0 (as you're *NOT* running applications on your NFS server)
|
||||||
|
|
||||||
|
- %wa ~= 0
|
||||||
|
|
||||||
|
- %id ~= 0
|
||||||
|
|
||||||
|
- %sy + %hi + %si ~= 100
|
||||||
|
|
||||||
|
If this pattern is seen, configuring more nfsd threads will *not*
|
||||||
|
improve the performance of the workload. If this patten is not
|
||||||
|
seen, then something more subtle is wrong.
|
||||||
|
|
||||||
|
threads-timedout
|
||||||
|
Counts how many times an nfsd thread triggered an idle timeout,
|
||||||
|
i.e. was not woken to handle any incoming network packets for
|
||||||
|
some time.
|
||||||
|
|
||||||
|
This statistic counts a circumstance where there are more nfsd
|
||||||
|
threads configured than can be used by the NFS workload. This is
|
||||||
|
a clue that the number of nfsd threads can be reduced without
|
||||||
|
affecting performance. Unfortunately, it's only a clue and not
|
||||||
|
a strong indication, for a couple of reasons:
|
||||||
|
|
||||||
|
- Currently the rate at which the counter is incremented is quite
|
||||||
|
slow; the idle timeout is 60 minutes. Unless the NFS workload
|
||||||
|
remains constant for hours at a time, this counter is unlikely
|
||||||
|
to be providing information that is still useful.
|
||||||
|
|
||||||
|
- It is usually a wise policy to provide some slack,
|
||||||
|
i.e. configure a few more nfsds than are currently needed,
|
||||||
|
to allow for future spikes in load.
|
||||||
|
|
||||||
|
|
||||||
|
Note that incoming packets on NFS transports will be dealt with in
|
||||||
|
one of three ways. An nfsd thread can be woken (threads-woken counts
|
||||||
|
this case), or the transport can be enqueued for later attention
|
||||||
|
(sockets-enqueued counts this case), or the packet can be temporarily
|
||||||
|
deferred because the transport is currently being used by an nfsd
|
||||||
|
thread. This last case is not very interesting and is not explicitly
|
||||||
|
counted, but can be inferred from the other counters thus:
|
||||||
|
|
||||||
|
packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken )
|
||||||
|
|
||||||
|
|
||||||
|
More
|
||||||
|
----
|
||||||
|
Descriptions of the other statistics file should go here.
|
||||||
|
|
||||||
|
|
||||||
|
Greg Banks <gnb@sgi.com>
|
||||||
|
26 Mar 2009
|
161
Documentation/filesystems/nfs41-server.txt
Normal file
161
Documentation/filesystems/nfs41-server.txt
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
NFSv4.1 Server Implementation
|
||||||
|
|
||||||
|
Server support for minorversion 1 can be controlled using the
|
||||||
|
/proc/fs/nfsd/versions control file. The string output returned
|
||||||
|
by reading this file will contain either "+4.1" or "-4.1"
|
||||||
|
correspondingly.
|
||||||
|
|
||||||
|
Currently, server support for minorversion 1 is disabled by default.
|
||||||
|
It can be enabled at run time by writing the string "+4.1" to
|
||||||
|
the /proc/fs/nfsd/versions control file. Note that to write this
|
||||||
|
control file, the nfsd service must be taken down. Use your user-mode
|
||||||
|
nfs-utils to set this up; see rpc.nfsd(8)
|
||||||
|
|
||||||
|
The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
|
||||||
|
on the latest NFSv4.1 Internet Draft:
|
||||||
|
http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion1-29
|
||||||
|
|
||||||
|
From the many new features in NFSv4.1 the current implementation
|
||||||
|
focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
|
||||||
|
"exactly once" semantics and better control and throttling of the
|
||||||
|
resources allocated for each client.
|
||||||
|
|
||||||
|
Other NFSv4.1 features, Parallel NFS operations in particular,
|
||||||
|
are still under development out of tree.
|
||||||
|
See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
|
||||||
|
for more information.
|
||||||
|
|
||||||
|
The table below, taken from the NFSv4.1 document, lists
|
||||||
|
the operations that are mandatory to implement (REQ), optional
|
||||||
|
(OPT), and NFSv4.0 operations that are required not to implement (MNI)
|
||||||
|
in minor version 1. The first column indicates the operations that
|
||||||
|
are not supported yet by the linux server implementation.
|
||||||
|
|
||||||
|
The OPTIONAL features identified and their abbreviations are as follows:
|
||||||
|
pNFS Parallel NFS
|
||||||
|
FDELG File Delegations
|
||||||
|
DDELG Directory Delegations
|
||||||
|
|
||||||
|
The following abbreviations indicate the linux server implementation status.
|
||||||
|
I Implemented NFSv4.1 operations.
|
||||||
|
NS Not Supported.
|
||||||
|
NS* unimplemented optional feature.
|
||||||
|
P pNFS features implemented out of tree.
|
||||||
|
PNS pNFS features that are not supported yet (out of tree).
|
||||||
|
|
||||||
|
Operations
|
||||||
|
|
||||||
|
+----------------------+------------+--------------+----------------+
|
||||||
|
| Operation | REQ, REC, | Feature | Definition |
|
||||||
|
| | OPT, or | (REQ, REC, | |
|
||||||
|
| | MNI | or OPT) | |
|
||||||
|
+----------------------+------------+--------------+----------------+
|
||||||
|
| ACCESS | REQ | | Section 18.1 |
|
||||||
|
NS | BACKCHANNEL_CTL | REQ | | Section 18.33 |
|
||||||
|
NS | BIND_CONN_TO_SESSION | REQ | | Section 18.34 |
|
||||||
|
| CLOSE | REQ | | Section 18.2 |
|
||||||
|
| COMMIT | REQ | | Section 18.3 |
|
||||||
|
| CREATE | REQ | | Section 18.4 |
|
||||||
|
I | CREATE_SESSION | REQ | | Section 18.36 |
|
||||||
|
NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 |
|
||||||
|
| DELEGRETURN | OPT | FDELG, | Section 18.6 |
|
||||||
|
| | | DDELG, pNFS | |
|
||||||
|
| | | (REQ) | |
|
||||||
|
NS | DESTROY_CLIENTID | REQ | | Section 18.50 |
|
||||||
|
I | DESTROY_SESSION | REQ | | Section 18.37 |
|
||||||
|
I | EXCHANGE_ID | REQ | | Section 18.35 |
|
||||||
|
NS | FREE_STATEID | REQ | | Section 18.38 |
|
||||||
|
| GETATTR | REQ | | Section 18.7 |
|
||||||
|
P | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 |
|
||||||
|
P | GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 |
|
||||||
|
| GETFH | REQ | | Section 18.8 |
|
||||||
|
NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 |
|
||||||
|
P | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 |
|
||||||
|
P | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 |
|
||||||
|
P | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 |
|
||||||
|
| LINK | OPT | | Section 18.9 |
|
||||||
|
| LOCK | REQ | | Section 18.10 |
|
||||||
|
| LOCKT | REQ | | Section 18.11 |
|
||||||
|
| LOCKU | REQ | | Section 18.12 |
|
||||||
|
| LOOKUP | REQ | | Section 18.13 |
|
||||||
|
| LOOKUPP | REQ | | Section 18.14 |
|
||||||
|
| NVERIFY | REQ | | Section 18.15 |
|
||||||
|
| OPEN | REQ | | Section 18.16 |
|
||||||
|
NS*| OPENATTR | OPT | | Section 18.17 |
|
||||||
|
| OPEN_CONFIRM | MNI | | N/A |
|
||||||
|
| OPEN_DOWNGRADE | REQ | | Section 18.18 |
|
||||||
|
| PUTFH | REQ | | Section 18.19 |
|
||||||
|
| PUTPUBFH | REQ | | Section 18.20 |
|
||||||
|
| PUTROOTFH | REQ | | Section 18.21 |
|
||||||
|
| READ | REQ | | Section 18.22 |
|
||||||
|
| READDIR | REQ | | Section 18.23 |
|
||||||
|
| READLINK | OPT | | Section 18.24 |
|
||||||
|
NS | RECLAIM_COMPLETE | REQ | | Section 18.51 |
|
||||||
|
| RELEASE_LOCKOWNER | MNI | | N/A |
|
||||||
|
| REMOVE | REQ | | Section 18.25 |
|
||||||
|
| RENAME | REQ | | Section 18.26 |
|
||||||
|
| RENEW | MNI | | N/A |
|
||||||
|
| RESTOREFH | REQ | | Section 18.27 |
|
||||||
|
| SAVEFH | REQ | | Section 18.28 |
|
||||||
|
| SECINFO | REQ | | Section 18.29 |
|
||||||
|
NS | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, |
|
||||||
|
| | | layout (REQ) | Section 13.12 |
|
||||||
|
I | SEQUENCE | REQ | | Section 18.46 |
|
||||||
|
| SETATTR | REQ | | Section 18.30 |
|
||||||
|
| SETCLIENTID | MNI | | N/A |
|
||||||
|
| SETCLIENTID_CONFIRM | MNI | | N/A |
|
||||||
|
NS | SET_SSV | REQ | | Section 18.47 |
|
||||||
|
NS | TEST_STATEID | REQ | | Section 18.48 |
|
||||||
|
| VERIFY | REQ | | Section 18.31 |
|
||||||
|
NS*| WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 |
|
||||||
|
| WRITE | REQ | | Section 18.32 |
|
||||||
|
|
||||||
|
Callback Operations
|
||||||
|
|
||||||
|
+-------------------------+-----------+-------------+---------------+
|
||||||
|
| Operation | REQ, REC, | Feature | Definition |
|
||||||
|
| | OPT, or | (REQ, REC, | |
|
||||||
|
| | MNI | or OPT) | |
|
||||||
|
+-------------------------+-----------+-------------+---------------+
|
||||||
|
| CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 |
|
||||||
|
P | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 |
|
||||||
|
NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 |
|
||||||
|
P | CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 |
|
||||||
|
NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 |
|
||||||
|
NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 |
|
||||||
|
| CB_RECALL | OPT | FDELG, | Section 20.2 |
|
||||||
|
| | | DDELG, pNFS | |
|
||||||
|
| | | (REQ) | |
|
||||||
|
NS*| CB_RECALL_ANY | OPT | FDELG, | Section 20.6 |
|
||||||
|
| | | DDELG, pNFS | |
|
||||||
|
| | | (REQ) | |
|
||||||
|
NS | CB_RECALL_SLOT | REQ | | Section 20.8 |
|
||||||
|
NS*| CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 |
|
||||||
|
| | | (REQ) | |
|
||||||
|
I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 |
|
||||||
|
| | | DDELG, pNFS | |
|
||||||
|
| | | (REQ) | |
|
||||||
|
NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
|
||||||
|
| | | DDELG, pNFS | |
|
||||||
|
| | | (REQ) | |
|
||||||
|
+-------------------------+-----------+-------------+---------------+
|
||||||
|
|
||||||
|
Implementation notes:
|
||||||
|
|
||||||
|
EXCHANGE_ID:
|
||||||
|
* only SP4_NONE state protection supported
|
||||||
|
* implementation ids are ignored
|
||||||
|
|
||||||
|
CREATE_SESSION:
|
||||||
|
* backchannel attributes are ignored
|
||||||
|
* backchannel security parameters are ignored
|
||||||
|
|
||||||
|
SEQUENCE:
|
||||||
|
* no support for dynamic slot table renegotiation (optional)
|
||||||
|
|
||||||
|
nfsv4.1 COMPOUND rules:
|
||||||
|
The following cases aren't supported yet:
|
||||||
|
* Enforcing of NFS4ERR_NOT_ONLY_OP for: BIND_CONN_TO_SESSION, CREATE_SESSION,
|
||||||
|
DESTROY_CLIENTID, DESTROY_SESSION, EXCHANGE_ID.
|
||||||
|
* DESTROY_SESSION MUST be the final operation in the COMPOUND request.
|
||||||
|
|
200
Documentation/filesystems/nilfs2.txt
Normal file
200
Documentation/filesystems/nilfs2.txt
Normal file
|
@ -0,0 +1,200 @@
|
||||||
|
NILFS2
|
||||||
|
------
|
||||||
|
|
||||||
|
NILFS2 is a log-structured file system (LFS) supporting continuous
|
||||||
|
snapshotting. In addition to versioning capability of the entire file
|
||||||
|
system, users can even restore files mistakenly overwritten or
|
||||||
|
destroyed just a few seconds ago. Since NILFS2 can keep consistency
|
||||||
|
like conventional LFS, it achieves quick recovery after system
|
||||||
|
crashes.
|
||||||
|
|
||||||
|
NILFS2 creates a number of checkpoints every few seconds or per
|
||||||
|
synchronous write basis (unless there is no change). Users can select
|
||||||
|
significant versions among continuously created checkpoints, and can
|
||||||
|
change them into snapshots which will be preserved until they are
|
||||||
|
changed back to checkpoints.
|
||||||
|
|
||||||
|
There is no limit on the number of snapshots until the volume gets
|
||||||
|
full. Each snapshot is mountable as a read-only file system
|
||||||
|
concurrently with its writable mount, and this feature is convenient
|
||||||
|
for online backup.
|
||||||
|
|
||||||
|
The userland tools are included in nilfs-utils package, which is
|
||||||
|
available from the following download page. At least "mkfs.nilfs2",
|
||||||
|
"mount.nilfs2", "umount.nilfs2", and "nilfs_cleanerd" (so called
|
||||||
|
cleaner or garbage collector) are required. Details on the tools are
|
||||||
|
described in the man pages included in the package.
|
||||||
|
|
||||||
|
Project web page: http://www.nilfs.org/en/
|
||||||
|
Download page: http://www.nilfs.org/en/download.html
|
||||||
|
Git tree web page: http://www.nilfs.org/git/
|
||||||
|
NILFS mailing lists: http://www.nilfs.org/mailman/listinfo/users
|
||||||
|
|
||||||
|
Caveats
|
||||||
|
=======
|
||||||
|
|
||||||
|
Features which NILFS2 does not support yet:
|
||||||
|
|
||||||
|
- atime
|
||||||
|
- extended attributes
|
||||||
|
- POSIX ACLs
|
||||||
|
- quotas
|
||||||
|
- writable snapshots
|
||||||
|
- remote backup (CDP)
|
||||||
|
- data integrity
|
||||||
|
- defragmentation
|
||||||
|
|
||||||
|
Mount options
|
||||||
|
=============
|
||||||
|
|
||||||
|
NILFS2 supports the following mount options:
|
||||||
|
(*) == default
|
||||||
|
|
||||||
|
barrier=on(*) This enables/disables barriers. barrier=off disables
|
||||||
|
it, barrier=on enables it.
|
||||||
|
errors=continue(*) Keep going on a filesystem error.
|
||||||
|
errors=remount-ro Remount the filesystem read-only on an error.
|
||||||
|
errors=panic Panic and halt the machine if an error occurs.
|
||||||
|
cp=n Specify the checkpoint-number of the snapshot to be
|
||||||
|
mounted. Checkpoints and snapshots are listed by lscp
|
||||||
|
user command. Only the checkpoints marked as snapshot
|
||||||
|
are mountable with this option. Snapshot is read-only,
|
||||||
|
so a read-only mount option must be specified together.
|
||||||
|
order=relaxed(*) Apply relaxed order semantics that allows modified data
|
||||||
|
blocks to be written to disk without making a
|
||||||
|
checkpoint if no metadata update is going. This mode
|
||||||
|
is equivalent to the ordered data mode of the ext3
|
||||||
|
filesystem except for the updates on data blocks still
|
||||||
|
conserve atomicity. This will improve synchronous
|
||||||
|
write performance for overwriting.
|
||||||
|
order=strict Apply strict in-order semantics that preserves sequence
|
||||||
|
of all file operations including overwriting of data
|
||||||
|
blocks. That means, it is guaranteed that no
|
||||||
|
overtaking of events occurs in the recovered file
|
||||||
|
system after a crash.
|
||||||
|
|
||||||
|
NILFS2 usage
|
||||||
|
============
|
||||||
|
|
||||||
|
To use nilfs2 as a local file system, simply:
|
||||||
|
|
||||||
|
# mkfs -t nilfs2 /dev/block_device
|
||||||
|
# mount -t nilfs2 /dev/block_device /dir
|
||||||
|
|
||||||
|
This will also invoke the cleaner through the mount helper program
|
||||||
|
(mount.nilfs2).
|
||||||
|
|
||||||
|
Checkpoints and snapshots are managed by the following commands.
|
||||||
|
Their manpages are included in the nilfs-utils package above.
|
||||||
|
|
||||||
|
lscp list checkpoints or snapshots.
|
||||||
|
mkcp make a checkpoint or a snapshot.
|
||||||
|
chcp change an existing checkpoint to a snapshot or vice versa.
|
||||||
|
rmcp invalidate specified checkpoint(s).
|
||||||
|
|
||||||
|
To mount a snapshot,
|
||||||
|
|
||||||
|
# mount -t nilfs2 -r -o cp=<cno> /dev/block_device /snap_dir
|
||||||
|
|
||||||
|
where <cno> is the checkpoint number of the snapshot.
|
||||||
|
|
||||||
|
To unmount the NILFS2 mount point or snapshot, simply:
|
||||||
|
|
||||||
|
# umount /dir
|
||||||
|
|
||||||
|
Then, the cleaner daemon is automatically shut down by the umount
|
||||||
|
helper program (umount.nilfs2).
|
||||||
|
|
||||||
|
Disk format
|
||||||
|
===========
|
||||||
|
|
||||||
|
A nilfs2 volume is equally divided into a number of segments except
|
||||||
|
for the super block (SB) and segment #0. A segment is the container
|
||||||
|
of logs. Each log is composed of summary information blocks, payload
|
||||||
|
blocks, and an optional super root block (SR):
|
||||||
|
|
||||||
|
______________________________________________________
|
||||||
|
| |SB| | Segment | Segment | Segment | ... | Segment | |
|
||||||
|
|_|__|_|____0____|____1____|____2____|_____|____N____|_|
|
||||||
|
0 +1K +4K +8M +16M +24M +(8MB x N)
|
||||||
|
. . (Typical offsets for 4KB-block)
|
||||||
|
. .
|
||||||
|
.______________________.
|
||||||
|
| log | log |... | log |
|
||||||
|
|__1__|__2__|____|__m__|
|
||||||
|
. .
|
||||||
|
. .
|
||||||
|
. .
|
||||||
|
.______________________________.
|
||||||
|
| Summary | Payload blocks |SR|
|
||||||
|
|_blocks__|_________________|__|
|
||||||
|
|
||||||
|
The payload blocks are organized per file, and each file consists of
|
||||||
|
data blocks and B-tree node blocks:
|
||||||
|
|
||||||
|
|<--- File-A --->|<--- File-B --->|
|
||||||
|
_______________________________________________________________
|
||||||
|
| Data blocks | B-tree blocks | Data blocks | B-tree blocks | ...
|
||||||
|
_|_____________|_______________|_____________|_______________|_
|
||||||
|
|
||||||
|
|
||||||
|
Since only the modified blocks are written in the log, it may have
|
||||||
|
files without data blocks or B-tree node blocks.
|
||||||
|
|
||||||
|
The organization of the blocks is recorded in the summary information
|
||||||
|
blocks, which contains a header structure (nilfs_segment_summary), per
|
||||||
|
file structures (nilfs_finfo), and per block structures (nilfs_binfo):
|
||||||
|
|
||||||
|
_________________________________________________________________________
|
||||||
|
| Summary | finfo | binfo | ... | binfo | finfo | binfo | ... | binfo |...
|
||||||
|
|_blocks__|___A___|_(A,1)_|_____|(A,Na)_|___B___|_(B,1)_|_____|(B,Nb)_|___
|
||||||
|
|
||||||
|
|
||||||
|
The logs include regular files, directory files, symbolic link files
|
||||||
|
and several meta data files. The mata data files are the files used
|
||||||
|
to maintain file system meta data. The current version of NILFS2 uses
|
||||||
|
the following meta data files:
|
||||||
|
|
||||||
|
1) Inode file (ifile) -- Stores on-disk inodes
|
||||||
|
2) Checkpoint file (cpfile) -- Stores checkpoints
|
||||||
|
3) Segment usage file (sufile) -- Stores allocation state of segments
|
||||||
|
4) Data address translation file -- Maps virtual block numbers to usual
|
||||||
|
(DAT) block numbers. This file serves to
|
||||||
|
make on-disk blocks relocatable.
|
||||||
|
|
||||||
|
The following figure shows a typical organization of the logs:
|
||||||
|
|
||||||
|
_________________________________________________________________________
|
||||||
|
| Summary | regular file | file | ... | ifile | cpfile | sufile | DAT |SR|
|
||||||
|
|_blocks__|_or_directory_|_______|_____|_______|________|________|_____|__|
|
||||||
|
|
||||||
|
|
||||||
|
To stride over segment boundaries, this sequence of files may be split
|
||||||
|
into multiple logs. The sequence of logs that should be treated as
|
||||||
|
logically one log, is delimited with flags marked in the segment
|
||||||
|
summary. The recovery code of nilfs2 looks this boundary information
|
||||||
|
to ensure atomicity of updates.
|
||||||
|
|
||||||
|
The super root block is inserted for every checkpoints. It includes
|
||||||
|
three special inodes, inodes for the DAT, cpfile, and sufile. Inodes
|
||||||
|
of regular files, directories, symlinks and other special files, are
|
||||||
|
included in the ifile. The inode of ifile itself is included in the
|
||||||
|
corresponding checkpoint entry in the cpfile. Thus, the hierarchy
|
||||||
|
among NILFS2 files can be depicted as follows:
|
||||||
|
|
||||||
|
Super block (SB)
|
||||||
|
|
|
||||||
|
v
|
||||||
|
Super root block (the latest cno=xx)
|
||||||
|
|-- DAT
|
||||||
|
|-- sufile
|
||||||
|
`-- cpfile
|
||||||
|
|-- ifile (cno=c1)
|
||||||
|
|-- ifile (cno=c2) ---- file (ino=i1)
|
||||||
|
: : |-- file (ino=i2)
|
||||||
|
`-- ifile (cno=xx) |-- file (ino=i3)
|
||||||
|
: :
|
||||||
|
`-- file (ino=yy)
|
||||||
|
( regular file, directory, or symlink )
|
||||||
|
|
||||||
|
For detail on the format of each file, please see include/linux/nilfs2_fs.h.
|
70
Documentation/filesystems/pohmelfs/design_notes.txt
Normal file
70
Documentation/filesystems/pohmelfs/design_notes.txt
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
POHMELFS: Parallel Optimized Host Message Exchange Layered File System.
|
||||||
|
|
||||||
|
Evgeniy Polyakov <zbr@ioremap.net>
|
||||||
|
|
||||||
|
Homepage: http://www.ioremap.net/projects/pohmelfs
|
||||||
|
|
||||||
|
POHMELFS first began as a network filesystem with coherent local data and
|
||||||
|
metadata caches but is now evolving into a parallel distributed filesystem.
|
||||||
|
|
||||||
|
Main features of this FS include:
|
||||||
|
* Locally coherent cache for data and metadata with (potentially) byte-range locks.
|
||||||
|
Since all Linux filesystems lock the whole inode during writing, algorithm
|
||||||
|
is very simple and does not use byte-ranges, although they are sent in
|
||||||
|
locking messages.
|
||||||
|
* Completely async processing of all events except creation of hard and symbolic
|
||||||
|
links, and rename events.
|
||||||
|
Object creation and data reading and writing are processed asynchronously.
|
||||||
|
* Flexible object architecture optimized for network processing.
|
||||||
|
Ability to create long paths to objects and remove arbitrarily huge
|
||||||
|
directories with a single network command.
|
||||||
|
(like removing the whole kernel tree via a single network command).
|
||||||
|
* Very high performance.
|
||||||
|
* Fast and scalable multithreaded userspace server. Being in userspace it works
|
||||||
|
with any underlying filesystem and still is much faster than async in-kernel NFS one.
|
||||||
|
* Client is able to switch between different servers (if one goes down, client
|
||||||
|
automatically reconnects to second and so on).
|
||||||
|
* Transactions support. Full failover for all operations.
|
||||||
|
Resending transactions to different servers on timeout or error.
|
||||||
|
* Read request (data read, directory listing, lookup requests) balancing between multiple servers.
|
||||||
|
* Write requests are replicated to multiple servers and completed only when all of them are acked.
|
||||||
|
* Ability to add and/or remove servers from the working set at run-time.
|
||||||
|
* Strong authentification and possible data encryption in network channel.
|
||||||
|
* Extended attributes support.
|
||||||
|
|
||||||
|
POHMELFS is based on transactions, which are potentially long-standing objects that live
|
||||||
|
in the client's memory. Each transaction contains all the information needed to process a given
|
||||||
|
command (or set of commands, which is frequently used during data writing: single transactions
|
||||||
|
can contain creation and data writing commands). Transactions are committed by all the servers
|
||||||
|
to which they are sent and, in case of failures, are eventually resent or dropped with an error.
|
||||||
|
For example, reading will return an error if no servers are available.
|
||||||
|
|
||||||
|
POHMELFS uses a asynchronous approach to data processing. Courtesy of transactions, it is
|
||||||
|
possible to detach replies from requests and, if the command requires data to be received, the
|
||||||
|
caller sleeps waiting for it. Thus, it is possible to issue multiple read commands to different
|
||||||
|
servers and async threads will pick up replies in parallel, find appropriate transactions in the
|
||||||
|
system and put the data where it belongs (like the page or inode cache).
|
||||||
|
|
||||||
|
The main feature of POHMELFS is writeback data and the metadata cache.
|
||||||
|
Only a few non-performance critical operations use the write-through cache and
|
||||||
|
are synchronous: hard and symbolic link creation, and object rename. Creation,
|
||||||
|
removal of objects and data writing are asynchronous and are sent to
|
||||||
|
the server during system writeback. Only one writer at a time is allowed for any
|
||||||
|
given inode, which is guarded by an appropriate locking protocol.
|
||||||
|
Because of this feature, POHMELFS is extremely fast at metadata intensive
|
||||||
|
workloads and can fully utilize the bandwidth to the servers when doing bulk
|
||||||
|
data transfers.
|
||||||
|
|
||||||
|
POHMELFS clients operate with a working set of servers and are capable of balancing read-only
|
||||||
|
operations (like lookups or directory listings) between them.
|
||||||
|
Administrators can add or remove servers from the set at run-time via special commands (described
|
||||||
|
in Documentation/pohmelfs/info.txt file). Writes are replicated to all servers.
|
||||||
|
|
||||||
|
POHMELFS is capable of full data channel encryption and/or strong crypto hashing.
|
||||||
|
One can select any kernel supported cipher, encryption mode, hash type and operation mode
|
||||||
|
(hmac or digest). It is also possible to use both or neither (default). Crypto configuration
|
||||||
|
is checked during mount time and, if the server does not support it, appropriate capabilities
|
||||||
|
will be disabled or mount will fail (if 'crypto_fail_unsupported' mount option is specified).
|
||||||
|
Crypto performance heavily depends on the number of crypto threads, which asynchronously perform
|
||||||
|
crypto operations and send the resulting data to server or submit it up the stack. This number
|
||||||
|
can be controlled via a mount option.
|
86
Documentation/filesystems/pohmelfs/info.txt
Normal file
86
Documentation/filesystems/pohmelfs/info.txt
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
POHMELFS usage information.
|
||||||
|
|
||||||
|
Mount options:
|
||||||
|
idx=%u
|
||||||
|
Each mountpoint is associated with a special index via this option.
|
||||||
|
Administrator can add or remove servers from the given index, so all mounts,
|
||||||
|
which were attached to it, are updated.
|
||||||
|
Default it is 0.
|
||||||
|
|
||||||
|
trans_scan_timeout=%u
|
||||||
|
This timeout, expressed in milliseconds, specifies time to scan transaction
|
||||||
|
trees looking for stale requests, which have to be resent, or if number of
|
||||||
|
retries exceed specified limit, dropped with error.
|
||||||
|
Default is 5 seconds.
|
||||||
|
|
||||||
|
drop_scan_timeout=%u
|
||||||
|
Internal timeout, expressed in milliseconds, which specifies how frequently
|
||||||
|
inodes marked to be dropped are freed. It also specifies how frequently
|
||||||
|
the system checks that servers have to be added or removed from current working set.
|
||||||
|
Default is 1 second.
|
||||||
|
|
||||||
|
wait_on_page_timeout=%u
|
||||||
|
Number of milliseconds to wait for reply from remote server for data reading command.
|
||||||
|
If this timeout is exceeded, reading returns an error.
|
||||||
|
Default is 5 seconds.
|
||||||
|
|
||||||
|
trans_retries=%u
|
||||||
|
This is the number of times that a transaction will be resent to a server that did
|
||||||
|
not answer for the last @trans_scan_timeout milliseconds.
|
||||||
|
When the number of resends exceeds this limit, the transaction is completed with error.
|
||||||
|
Default is 5 resends.
|
||||||
|
|
||||||
|
crypto_thread_num=%u
|
||||||
|
Number of crypto processing threads. Threads are used both for RX and TX traffic.
|
||||||
|
Default is 2, or no threads if crypto operations are not supported.
|
||||||
|
|
||||||
|
trans_max_pages=%u
|
||||||
|
Maximum number of pages in a single transaction. This parameter also controls
|
||||||
|
the number of pages, allocated for crypto processing (each crypto thread has
|
||||||
|
pool of pages, the number of which is equal to 'trans_max_pages'.
|
||||||
|
Default is 100 pages.
|
||||||
|
|
||||||
|
crypto_fail_unsupported
|
||||||
|
If specified, mount will fail if the server does not support requested crypto operations.
|
||||||
|
By default mount will disable non-matching crypto operations.
|
||||||
|
|
||||||
|
mcache_timeout=%u
|
||||||
|
Maximum number of milliseconds to wait for the mcache objects to be processed.
|
||||||
|
Mcache includes locks (given lock should be granted by server), attributes (they should be
|
||||||
|
fully received in the given timeframe).
|
||||||
|
Default is 5 seconds.
|
||||||
|
|
||||||
|
Usage examples.
|
||||||
|
|
||||||
|
Add (or remove if it already exists) server server1.net:1025 into the working set with index $idx
|
||||||
|
with appropriate hash algorithm and key file and cipher algorithm, mode and key file:
|
||||||
|
$cfg -a server1.net -p 1025 -i $idx -K $hash_key -k $cipher_key
|
||||||
|
|
||||||
|
Mount filesystem with given index $idx to /mnt mountpoint.
|
||||||
|
Client will connect to all servers specified in the working set via previous command:
|
||||||
|
mount -t pohmel -o idx=$idx q /mnt
|
||||||
|
|
||||||
|
One can add or remove servers from working set after mounting too.
|
||||||
|
|
||||||
|
|
||||||
|
Server installation.
|
||||||
|
|
||||||
|
Creating a server, which listens at port 1025 and 0.0.0.0 address.
|
||||||
|
Working root directory (note, that server chroots there, so you have to have appropriate permissions)
|
||||||
|
is set to /mnt, server will negotiate hash/cipher with client, in case client requested it, there
|
||||||
|
are appropriate key files.
|
||||||
|
Number of working threads is set to 10.
|
||||||
|
|
||||||
|
# ./fserver -a 0.0.0.0 -p 1025 -r /mnt -w 10 -K hash_key -k cipher_key
|
||||||
|
|
||||||
|
-A 6 - listen on ipv6 address. Default: Disabled.
|
||||||
|
-r root - path to root directory. Default: /tmp.
|
||||||
|
-a addr - listen address. Default: 0.0.0.0.
|
||||||
|
-p port - listen port. Default: 1025.
|
||||||
|
-w workers - number of workers per connected client. Default: 1.
|
||||||
|
-K file - hash key size. Default: none.
|
||||||
|
-k file - cipher key size. Default: none.
|
||||||
|
-h - this help.
|
||||||
|
|
||||||
|
Number of worker threads specifies how many workers will be created for each client.
|
||||||
|
Bulk single-client transafers usually are better handled with smaller number (like 1-3).
|
227
Documentation/filesystems/pohmelfs/network_protocol.txt
Normal file
227
Documentation/filesystems/pohmelfs/network_protocol.txt
Normal file
|
@ -0,0 +1,227 @@
|
||||||
|
POHMELFS network protocol.
|
||||||
|
|
||||||
|
Basic structure used in network communication is following command:
|
||||||
|
|
||||||
|
struct netfs_cmd
|
||||||
|
{
|
||||||
|
__u16 cmd; /* Command number */
|
||||||
|
__u16 csize; /* Attached crypto information size */
|
||||||
|
__u16 cpad; /* Attached padding size */
|
||||||
|
__u16 ext; /* External flags */
|
||||||
|
__u32 size; /* Size of the attached data */
|
||||||
|
__u32 trans; /* Transaction id */
|
||||||
|
__u64 id; /* Object ID to operate on. Used for feedback.*/
|
||||||
|
__u64 start; /* Start of the object. */
|
||||||
|
__u64 iv; /* IV sequence */
|
||||||
|
__u8 data[0];
|
||||||
|
};
|
||||||
|
|
||||||
|
Commands can be embedded into transaction command (which in turn has own command),
|
||||||
|
so one can extend protocol as needed without breaking backward compatibility as long
|
||||||
|
as old commands are supported. All string lengths include tail 0 byte.
|
||||||
|
|
||||||
|
All commans are transfered over the network in big-endian. CPU endianess is used at the end peers.
|
||||||
|
|
||||||
|
@cmd - command number, which specifies command to be processed. Following
|
||||||
|
commands are used currently:
|
||||||
|
|
||||||
|
NETFS_READDIR = 1, /* Read directory for given inode number */
|
||||||
|
NETFS_READ_PAGE, /* Read data page from the server */
|
||||||
|
NETFS_WRITE_PAGE, /* Write data page to the server */
|
||||||
|
NETFS_CREATE, /* Create directory entry */
|
||||||
|
NETFS_REMOVE, /* Remove directory entry */
|
||||||
|
NETFS_LOOKUP, /* Lookup single object */
|
||||||
|
NETFS_LINK, /* Create a link */
|
||||||
|
NETFS_TRANS, /* Transaction */
|
||||||
|
NETFS_OPEN, /* Open intent */
|
||||||
|
NETFS_INODE_INFO, /* Metadata cache coherency synchronization message */
|
||||||
|
NETFS_PAGE_CACHE, /* Page cache invalidation message */
|
||||||
|
NETFS_READ_PAGES, /* Read multiple contiguous pages in one go */
|
||||||
|
NETFS_RENAME, /* Rename object */
|
||||||
|
NETFS_CAPABILITIES, /* Capabilities of the client, for example supported crypto */
|
||||||
|
NETFS_LOCK, /* Distributed lock message */
|
||||||
|
NETFS_XATTR_SET, /* Set extended attribute */
|
||||||
|
NETFS_XATTR_GET, /* Get extended attribute */
|
||||||
|
|
||||||
|
@ext - external flags. Used by different commands to specify some extra arguments
|
||||||
|
like partial size of the embedded objects or creation flags.
|
||||||
|
|
||||||
|
@size - size of the attached data. For NETFS_READ_PAGE and NETFS_READ_PAGES no data is attached,
|
||||||
|
but size of the requested data is incorporated here. It does not include size of the command
|
||||||
|
header (struct netfs_cmd) itself.
|
||||||
|
|
||||||
|
@id - id of the object this command operates on. Each command can use it for own purpose.
|
||||||
|
|
||||||
|
@start - start of the object this command operates on. Each command can use it for own purpose.
|
||||||
|
|
||||||
|
@csize, @cpad - size and padding size of the (attached if needed) crypto information.
|
||||||
|
|
||||||
|
Command specifications.
|
||||||
|
|
||||||
|
@NETFS_READDIR
|
||||||
|
This command is used to sync content of the remote dir to the client.
|
||||||
|
|
||||||
|
@ext - length of the path to object.
|
||||||
|
@size - the same.
|
||||||
|
@id - local inode number of the directory to read.
|
||||||
|
@start - zero.
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_READ_PAGE
|
||||||
|
This command is used to read data from remote server.
|
||||||
|
Data size does not exceed local page cache size.
|
||||||
|
|
||||||
|
@id - inode number.
|
||||||
|
@start - first byte offset.
|
||||||
|
@size - number of bytes to read plus length of the path to object.
|
||||||
|
@ext - object path length.
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_CREATE
|
||||||
|
Used to create object.
|
||||||
|
It does not require that all directories on top of the object were
|
||||||
|
already created, it will create them automatically. Each object has
|
||||||
|
associated @netfs_path_entry data structure, which contains creation
|
||||||
|
mode (permissions and type) and length of the name as long as name itself.
|
||||||
|
|
||||||
|
@start - 0
|
||||||
|
@size - size of the all data structures needed to create a path
|
||||||
|
@id - local inode number
|
||||||
|
@ext - 0
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_REMOVE
|
||||||
|
Used to remove object.
|
||||||
|
|
||||||
|
@ext - length of the path to object.
|
||||||
|
@size - the same.
|
||||||
|
@id - local inode number.
|
||||||
|
@start - zero.
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_LOOKUP
|
||||||
|
Lookup information about object on server.
|
||||||
|
|
||||||
|
@ext - length of the path to object.
|
||||||
|
@size - the same.
|
||||||
|
@id - local inode number of the directory to look object in.
|
||||||
|
@start - local inode number of the object to look at.
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_LINK
|
||||||
|
Create hard of symlink.
|
||||||
|
Command is sent as "object_path|target_path".
|
||||||
|
|
||||||
|
@size - size of the above string.
|
||||||
|
@id - parent local inode number.
|
||||||
|
@start - 1 for symlink, 0 for hardlink.
|
||||||
|
@ext - size of the "object_path" above.
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_TRANS
|
||||||
|
Transaction header.
|
||||||
|
|
||||||
|
@size - incorporates all embedded command sizes including theirs header sizes.
|
||||||
|
@start - transaction generation number - unique id used to find transaction.
|
||||||
|
@ext - transaction flags. Unused at the moment.
|
||||||
|
@id - 0.
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_OPEN
|
||||||
|
Open intent for given transaction.
|
||||||
|
|
||||||
|
@id - local inode number.
|
||||||
|
@start - 0.
|
||||||
|
@size - path length to the object.
|
||||||
|
@ext - open flags (O_RDWR and so on).
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_INODE_INFO
|
||||||
|
Metadata update command.
|
||||||
|
It is sent to servers when attributes of the object are changed and received
|
||||||
|
when data or metadata were updated. It operates with the following structure:
|
||||||
|
|
||||||
|
struct netfs_inode_info
|
||||||
|
{
|
||||||
|
unsigned int mode;
|
||||||
|
unsigned int nlink;
|
||||||
|
unsigned int uid;
|
||||||
|
unsigned int gid;
|
||||||
|
unsigned int blocksize;
|
||||||
|
unsigned int padding;
|
||||||
|
__u64 ino;
|
||||||
|
__u64 blocks;
|
||||||
|
__u64 rdev;
|
||||||
|
__u64 size;
|
||||||
|
__u64 version;
|
||||||
|
};
|
||||||
|
|
||||||
|
It effectively mirrors stat(2) returned data.
|
||||||
|
|
||||||
|
|
||||||
|
@ext - path length to the object.
|
||||||
|
@size - the same plus size of the netfs_inode_info structure.
|
||||||
|
@id - local inode number.
|
||||||
|
@start - 0.
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_PAGE_CACHE
|
||||||
|
Command is only received by clients. It contains information about
|
||||||
|
page to be marked as not up-to-date.
|
||||||
|
|
||||||
|
@id - client's inode number.
|
||||||
|
@start - last byte of the page to be invalidated. If it is not equal to
|
||||||
|
current inode size, it will be vmtruncated().
|
||||||
|
@size - 0
|
||||||
|
@ext - 0
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_READ_PAGES
|
||||||
|
Used to read multiple contiguous pages in one go.
|
||||||
|
|
||||||
|
@start - first byte of the contiguous region to read.
|
||||||
|
@size - contains of two fields: lower 8 bits are used to represent page cache shift
|
||||||
|
used by client, another 3 bytes are used to get number of pages.
|
||||||
|
@id - local inode number.
|
||||||
|
@ext - path length to the object.
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_RENAME
|
||||||
|
Used to rename object.
|
||||||
|
Attached data is formed into following string: "old_path|new_path".
|
||||||
|
|
||||||
|
@id - local inode number.
|
||||||
|
@start - parent inode number.
|
||||||
|
@size - length of the above string.
|
||||||
|
@ext - length of the old path part.
|
||||||
|
|
||||||
|
|
||||||
|
@NETFS_CAPABILITIES
|
||||||
|
Used to exchange crypto capabilities with server.
|
||||||
|
If crypto capabilities are not supported by server, then client will disable it
|
||||||
|
or fail (if 'crypto_fail_unsupported' mount options was specified).
|
||||||
|
|
||||||
|
@id - superblock index. Used to specify crypto information for group of servers.
|
||||||
|
@size - size of the attached capabilities structure.
|
||||||
|
@start - 0.
|
||||||
|
@size - 0.
|
||||||
|
@scsize - 0.
|
||||||
|
|
||||||
|
@NETFS_LOCK
|
||||||
|
Used to send lock request/release messages. Although it sends byte range request
|
||||||
|
and is capable of flushing pages based on that, it is not used, since all Linux
|
||||||
|
filesystems lock the whole inode.
|
||||||
|
|
||||||
|
@id - lock generation number.
|
||||||
|
@start - start of the locked range.
|
||||||
|
@size - size of the locked range.
|
||||||
|
@ext - lock type: read/write. Not used actually. 15'th bit is used to determine,
|
||||||
|
if it is lock request (1) or release (0).
|
||||||
|
|
||||||
|
@NETFS_XATTR_SET
|
||||||
|
@NETFS_XATTR_GET
|
||||||
|
Used to set/get extended attributes for given inode.
|
||||||
|
@id - attribute generation number or xattr setting type
|
||||||
|
@start - size of the attribute (request or attached)
|
||||||
|
@size - name length, path len and data size for given attribute
|
||||||
|
@ext - path length for given object
|
File diff suppressed because it is too large
Load diff
|
@ -24,6 +24,8 @@ The following mount options are supported:
|
||||||
|
|
||||||
gid= Set the default group.
|
gid= Set the default group.
|
||||||
umask= Set the default umask.
|
umask= Set the default umask.
|
||||||
|
mode= Set the default file permissions.
|
||||||
|
dmode= Set the default directory permissions.
|
||||||
uid= Set the default user.
|
uid= Set the default user.
|
||||||
bs= Set the block size.
|
bs= Set the block size.
|
||||||
unhide Show otherwise hidden files.
|
unhide Show otherwise hidden files.
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -123,7 +123,10 @@ platform-specific implementation issue.
|
||||||
|
|
||||||
Using GPIOs
|
Using GPIOs
|
||||||
-----------
|
-----------
|
||||||
One of the first things to do with a GPIO, often in board setup code when
|
The first thing a system should do with a GPIO is allocate it, using
|
||||||
|
the gpio_request() call; see later.
|
||||||
|
|
||||||
|
One of the next things to do with a GPIO, often in board setup code when
|
||||||
setting up a platform_device using the GPIO, is mark its direction:
|
setting up a platform_device using the GPIO, is mark its direction:
|
||||||
|
|
||||||
/* set as input or output, returning 0 or negative errno */
|
/* set as input or output, returning 0 or negative errno */
|
||||||
|
@ -141,8 +144,8 @@ This helps avoid signal glitching during system startup.
|
||||||
|
|
||||||
For compatibility with legacy interfaces to GPIOs, setting the direction
|
For compatibility with legacy interfaces to GPIOs, setting the direction
|
||||||
of a GPIO implicitly requests that GPIO (see below) if it has not been
|
of a GPIO implicitly requests that GPIO (see below) if it has not been
|
||||||
requested already. That compatibility may be removed in the future;
|
requested already. That compatibility is being removed from the optional
|
||||||
explicitly requesting GPIOs is strongly preferred.
|
gpiolib framework.
|
||||||
|
|
||||||
Setting the direction can fail if the GPIO number is invalid, or when
|
Setting the direction can fail if the GPIO number is invalid, or when
|
||||||
that particular GPIO can't be used in that mode. It's generally a bad
|
that particular GPIO can't be used in that mode. It's generally a bad
|
||||||
|
@ -195,7 +198,7 @@ This requires sleeping, which can't be done from inside IRQ handlers.
|
||||||
|
|
||||||
Platforms that support this type of GPIO distinguish them from other GPIOs
|
Platforms that support this type of GPIO distinguish them from other GPIOs
|
||||||
by returning nonzero from this call (which requires a valid GPIO number,
|
by returning nonzero from this call (which requires a valid GPIO number,
|
||||||
either explicitly or implicitly requested):
|
which should have been previously allocated with gpio_request):
|
||||||
|
|
||||||
int gpio_cansleep(unsigned gpio);
|
int gpio_cansleep(unsigned gpio);
|
||||||
|
|
||||||
|
@ -212,10 +215,9 @@ for GPIOs that can't be accessed from IRQ handlers, these calls act the
|
||||||
same as the spinlock-safe calls.
|
same as the spinlock-safe calls.
|
||||||
|
|
||||||
|
|
||||||
Claiming and Releasing GPIOs (OPTIONAL)
|
Claiming and Releasing GPIOs
|
||||||
---------------------------------------
|
----------------------------
|
||||||
To help catch system configuration errors, two calls are defined.
|
To help catch system configuration errors, two calls are defined.
|
||||||
However, many platforms don't currently support this mechanism.
|
|
||||||
|
|
||||||
/* request GPIO, returning 0 or negative errno.
|
/* request GPIO, returning 0 or negative errno.
|
||||||
* non-null labels may be useful for diagnostics.
|
* non-null labels may be useful for diagnostics.
|
||||||
|
@ -244,13 +246,6 @@ Some platforms may also use knowledge about what GPIOs are active for
|
||||||
power management, such as by powering down unused chip sectors and, more
|
power management, such as by powering down unused chip sectors and, more
|
||||||
easily, gating off unused clocks.
|
easily, gating off unused clocks.
|
||||||
|
|
||||||
These two calls are optional because not not all current Linux platforms
|
|
||||||
offer such functionality in their GPIO support; a valid implementation
|
|
||||||
could return success for all gpio_request() calls. Unlike the other calls,
|
|
||||||
the state they represent doesn't normally match anything from a hardware
|
|
||||||
register; it's just a software bitmap which clearly is not necessary for
|
|
||||||
correct operation of hardware or (bug free) drivers.
|
|
||||||
|
|
||||||
Note that requesting a GPIO does NOT cause it to be configured in any
|
Note that requesting a GPIO does NOT cause it to be configured in any
|
||||||
way; it just marks that GPIO as in use. Separate code must handle any
|
way; it just marks that GPIO as in use. Separate code must handle any
|
||||||
pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown).
|
pin setup (e.g. controlling which pin the GPIO uses, pullup/pulldown).
|
||||||
|
|
36
Documentation/hwmon/g760a
Normal file
36
Documentation/hwmon/g760a
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
Kernel driver g760a
|
||||||
|
===================
|
||||||
|
|
||||||
|
Supported chips:
|
||||||
|
* Global Mixed-mode Technology Inc. G760A
|
||||||
|
Prefix: 'g760a'
|
||||||
|
Datasheet: Publicly available at the GMT website
|
||||||
|
http://www.gmt.com.tw/datasheet/g760a.pdf
|
||||||
|
|
||||||
|
Author: Herbert Valerio Riedel <hvr@gnu.org>
|
||||||
|
|
||||||
|
Description
|
||||||
|
-----------
|
||||||
|
|
||||||
|
The GMT G760A Fan Speed PWM Controller is connected directly to a fan
|
||||||
|
and performs closed-loop control of the fan speed.
|
||||||
|
|
||||||
|
The fan speed is programmed by setting the period via 'pwm1' of two
|
||||||
|
consecutive speed pulses. The period is defined in terms of clock
|
||||||
|
cycle counts of an assumed 32kHz clock source.
|
||||||
|
|
||||||
|
Setting a period of 0 stops the fan; setting the period to 255 sets
|
||||||
|
fan to maximum speed.
|
||||||
|
|
||||||
|
The measured fan rotation speed returned via 'fan1_input' is derived
|
||||||
|
from the measured speed pulse period by assuming again a 32kHz clock
|
||||||
|
source and a 2 pulse-per-revolution fan.
|
||||||
|
|
||||||
|
The 'alarms' file provides access to the two alarm bits provided by
|
||||||
|
the G760A chip's status register: Bit 0 is set when the actual fan
|
||||||
|
speed differs more than 20% with respect to the programmed fan speed;
|
||||||
|
bit 1 is set when fan speed is below 1920 RPM.
|
||||||
|
|
||||||
|
The g760a driver will not update its values more frequently than every
|
||||||
|
other second; reading them more often will do no harm, but will return
|
||||||
|
'old' values.
|
|
@ -42,7 +42,7 @@ Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qe
|
||||||
hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
|
hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
|
||||||
you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
|
you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
|
||||||
|
|
||||||
(3) Rename the firware you owned to Flash.fd, and copy it to /usr/local/share/qemu
|
(3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu
|
||||||
|
|
||||||
4. Boot up Linux or Windows guests:
|
4. Boot up Linux or Windows guests:
|
||||||
4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
|
4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
|
||||||
|
|
|
@ -61,24 +61,28 @@ GigaSet 307x Device Driver
|
||||||
---------------------
|
---------------------
|
||||||
2.1. Modules
|
2.1. Modules
|
||||||
-------
|
-------
|
||||||
To get the device working, you have to load the proper kernel module. You
|
For the devices to work, the proper kernel modules have to be loaded.
|
||||||
can do this using
|
This normally happens automatically when the system detects the USB
|
||||||
modprobe modulename
|
device (base, M105) or when the line discipline is attached (M101). It
|
||||||
where modulename is ser_gigaset (M101), usb_gigaset (M105), or
|
can also be triggered manually using the modprobe(8) command, for example
|
||||||
bas_gigaset (direct USB connection to the base).
|
for troubleshooting or to pass module parameters.
|
||||||
|
|
||||||
The module ser_gigaset provides a serial line discipline N_GIGASET_M101
|
The module ser_gigaset provides a serial line discipline N_GIGASET_M101
|
||||||
which drives the device through the regular serial line driver. To use it,
|
which drives the device through the regular serial line driver. It must
|
||||||
run the Gigaset M101 daemon "gigasetm101d" (also available from
|
be attached to the serial line to which the M101 is connected with the
|
||||||
http://sourceforge.net/projects/gigaset307x/) with the device file of the
|
ldattach(8) command (requires util-linux-ng release 2.14 or later), for
|
||||||
RS232 port to the M101 as an argument, for example:
|
example:
|
||||||
gigasetm101d /dev/ttyS1
|
ldattach GIGASET_M101 /dev/ttyS1
|
||||||
This will open the device file, set its line discipline to N_GIGASET_M101,
|
This will open the device file, attach the line discipline to it, and
|
||||||
and then sleep in the background, keeping the device open so that the
|
then sleep in the background, keeping the device open so that the line
|
||||||
line discipline remains active. To deactivate it, kill the daemon, for
|
discipline remains active. To deactivate it, kill the daemon, for example
|
||||||
example with
|
with
|
||||||
killall gigasetm101d
|
killall ldattach
|
||||||
before disconnecting the device.
|
before disconnecting the device. To have this happen automatically at
|
||||||
|
system startup/shutdown on an LSB compatible system, create and activate
|
||||||
|
an appropriate LSB startup script /etc/init.d/gigaset. (The init name
|
||||||
|
'gigaset' is officially assigned to this project by LANANA.)
|
||||||
|
Alternatively, just add the 'ldattach' command line to /etc/rc.local.
|
||||||
|
|
||||||
2.2. Device nodes for user space programs
|
2.2. Device nodes for user space programs
|
||||||
------------------------------------
|
------------------------------------
|
||||||
|
@ -194,10 +198,11 @@ GigaSet 307x Device Driver
|
||||||
operation (for wireless access to the base), but are needed for access
|
operation (for wireless access to the base), but are needed for access
|
||||||
to the M105's own configuration mode (registration to the base, baudrate
|
to the M105's own configuration mode (registration to the base, baudrate
|
||||||
and line format settings, device status queries) via the gigacontr
|
and line format settings, device status queries) via the gigacontr
|
||||||
utility. Their use is disabled in the driver by default for safety
|
utility. Their use is controlled by the kernel configuration option
|
||||||
reasons but can be enabled by setting the kernel configuration option
|
"Support for undocumented USB requests" (CONFIG_GIGASET_UNDOCREQ). If you
|
||||||
"Support for undocumented USB requests" (GIGASET_UNDOCREQ) to "Y" and
|
encounter error code -ENOTTY when trying to use some features of the
|
||||||
recompiling.
|
M105, try setting that option to "y" via 'make {x,menu}config' and
|
||||||
|
recompiling the driver.
|
||||||
|
|
||||||
|
|
||||||
3. Troubleshooting
|
3. Troubleshooting
|
||||||
|
@ -228,6 +233,13 @@ GigaSet 307x Device Driver
|
||||||
Solution:
|
Solution:
|
||||||
Select Unimodem mode for all DECT data adapters. (see section 2.4.)
|
Select Unimodem mode for all DECT data adapters. (see section 2.4.)
|
||||||
|
|
||||||
|
Problem:
|
||||||
|
You want to configure your USB DECT data adapter (M105) but gigacontr
|
||||||
|
reports an error: "/dev/ttyGU0: Inappropriate ioctl for device".
|
||||||
|
Solution:
|
||||||
|
Recompile the usb_gigaset driver with the kernel configuration option
|
||||||
|
CONFIG_GIGASET_UNDOCREQ set to 'y'. (see section 2.6.)
|
||||||
|
|
||||||
3.2. Telling the driver to provide more information
|
3.2. Telling the driver to provide more information
|
||||||
----------------------------------------------
|
----------------------------------------------
|
||||||
Building the driver with the "Gigaset debugging" kernel configuration
|
Building the driver with the "Gigaset debugging" kernel configuration
|
||||||
|
|
|
@ -50,6 +50,7 @@ parameter is applicable:
|
||||||
ISAPNP ISA PnP code is enabled.
|
ISAPNP ISA PnP code is enabled.
|
||||||
ISDN Appropriate ISDN support is enabled.
|
ISDN Appropriate ISDN support is enabled.
|
||||||
JOY Appropriate joystick support is enabled.
|
JOY Appropriate joystick support is enabled.
|
||||||
|
KMEMTRACE kmemtrace is enabled.
|
||||||
LIBATA Libata driver is enabled
|
LIBATA Libata driver is enabled
|
||||||
LP Printer support is enabled.
|
LP Printer support is enabled.
|
||||||
LOOP Loopback device support is enabled.
|
LOOP Loopback device support is enabled.
|
||||||
|
@ -152,60 +153,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
1,0: use 1st APIC table
|
1,0: use 1st APIC table
|
||||||
default: 0
|
default: 0
|
||||||
|
|
||||||
acpi_sleep= [HW,ACPI] Sleep options
|
|
||||||
Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
|
|
||||||
old_ordering, s4_nonvs }
|
|
||||||
See Documentation/power/video.txt for information on
|
|
||||||
s3_bios and s3_mode.
|
|
||||||
s3_beep is for debugging; it makes the PC's speaker beep
|
|
||||||
as soon as the kernel's real-mode entry point is called.
|
|
||||||
s4_nohwsig prevents ACPI hardware signature from being
|
|
||||||
used during resume from hibernation.
|
|
||||||
old_ordering causes the ACPI 1.0 ordering of the _PTS
|
|
||||||
control method, with respect to putting devices into
|
|
||||||
low power states, to be enforced (the ACPI 2.0 ordering
|
|
||||||
of _PTS is used by default).
|
|
||||||
s4_nonvs prevents the kernel from saving/restoring the
|
|
||||||
ACPI NVS memory during hibernation.
|
|
||||||
|
|
||||||
acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
|
|
||||||
Format: { level | edge | high | low }
|
|
||||||
|
|
||||||
acpi_irq_balance [HW,ACPI]
|
|
||||||
ACPI will balance active IRQs
|
|
||||||
default in APIC mode
|
|
||||||
|
|
||||||
acpi_irq_nobalance [HW,ACPI]
|
|
||||||
ACPI will not move active IRQs (default)
|
|
||||||
default in PIC mode
|
|
||||||
|
|
||||||
acpi_irq_pci= [HW,ACPI] If irq_balance, clear listed IRQs for
|
|
||||||
use by PCI
|
|
||||||
Format: <irq>,<irq>...
|
|
||||||
|
|
||||||
acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA
|
|
||||||
Format: <irq>,<irq>...
|
|
||||||
|
|
||||||
acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT
|
|
||||||
|
|
||||||
acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS
|
|
||||||
Format: To spoof as Windows 98: ="Microsoft Windows"
|
|
||||||
|
|
||||||
acpi_osi= [HW,ACPI] Modify list of supported OS interface strings
|
|
||||||
acpi_osi="string1" # add string1 -- only one string
|
|
||||||
acpi_osi="!string2" # remove built-in string2
|
|
||||||
acpi_osi= # disable all strings
|
|
||||||
|
|
||||||
acpi_serialize [HW,ACPI] force serialization of AML methods
|
|
||||||
|
|
||||||
acpi_skip_timer_override [HW,ACPI]
|
|
||||||
Recognize and ignore IRQ0/pin2 Interrupt Override.
|
|
||||||
For broken nForce2 BIOS resulting in XT-PIC timer.
|
|
||||||
acpi_use_timer_override [HW,ACPI]
|
|
||||||
Use timer override. For some broken Nvidia NF5 boards
|
|
||||||
that require a timer override, but don't have
|
|
||||||
HPET
|
|
||||||
|
|
||||||
acpi_backlight= [HW,ACPI]
|
acpi_backlight= [HW,ACPI]
|
||||||
acpi_backlight=vendor
|
acpi_backlight=vendor
|
||||||
acpi_backlight=video
|
acpi_backlight=video
|
||||||
|
@ -213,11 +160,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
(e.g. thinkpad_acpi, sony_acpi, etc.) instead
|
(e.g. thinkpad_acpi, sony_acpi, etc.) instead
|
||||||
of the ACPI video.ko driver.
|
of the ACPI video.ko driver.
|
||||||
|
|
||||||
acpi_display_output= [HW,ACPI]
|
|
||||||
acpi_display_output=vendor
|
|
||||||
acpi_display_output=video
|
|
||||||
See above.
|
|
||||||
|
|
||||||
acpi.debug_layer= [HW,ACPI,ACPI_DEBUG]
|
acpi.debug_layer= [HW,ACPI,ACPI_DEBUG]
|
||||||
acpi.debug_level= [HW,ACPI,ACPI_DEBUG]
|
acpi.debug_level= [HW,ACPI,ACPI_DEBUG]
|
||||||
Format: <int>
|
Format: <int>
|
||||||
|
@ -246,6 +188,41 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
unusable. The "log_buf_len" parameter may be useful
|
unusable. The "log_buf_len" parameter may be useful
|
||||||
if you need to capture more output.
|
if you need to capture more output.
|
||||||
|
|
||||||
|
acpi_display_output= [HW,ACPI]
|
||||||
|
acpi_display_output=vendor
|
||||||
|
acpi_display_output=video
|
||||||
|
See above.
|
||||||
|
|
||||||
|
acpi_irq_balance [HW,ACPI]
|
||||||
|
ACPI will balance active IRQs
|
||||||
|
default in APIC mode
|
||||||
|
|
||||||
|
acpi_irq_nobalance [HW,ACPI]
|
||||||
|
ACPI will not move active IRQs (default)
|
||||||
|
default in PIC mode
|
||||||
|
|
||||||
|
acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA
|
||||||
|
Format: <irq>,<irq>...
|
||||||
|
|
||||||
|
acpi_irq_pci= [HW,ACPI] If irq_balance, clear listed IRQs for
|
||||||
|
use by PCI
|
||||||
|
Format: <irq>,<irq>...
|
||||||
|
|
||||||
|
acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT
|
||||||
|
|
||||||
|
acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS
|
||||||
|
Format: To spoof as Windows 98: ="Microsoft Windows"
|
||||||
|
|
||||||
|
acpi_osi= [HW,ACPI] Modify list of supported OS interface strings
|
||||||
|
acpi_osi="string1" # add string1 -- only one string
|
||||||
|
acpi_osi="!string2" # remove built-in string2
|
||||||
|
acpi_osi= # disable all strings
|
||||||
|
|
||||||
|
acpi_pm_good [X86-32,X86-64]
|
||||||
|
Override the pmtimer bug detection: force the kernel
|
||||||
|
to assume that this machine's pmtimer latches its value
|
||||||
|
and always returns good values.
|
||||||
|
|
||||||
acpi.power_nocheck= [HW,ACPI]
|
acpi.power_nocheck= [HW,ACPI]
|
||||||
Format: 1/0 enable/disable the check of power state.
|
Format: 1/0 enable/disable the check of power state.
|
||||||
On some bogus BIOS the _PSC object/_STA object of
|
On some bogus BIOS the _PSC object/_STA object of
|
||||||
|
@ -254,26 +231,21 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
power state again in power transition.
|
power state again in power transition.
|
||||||
1 : disable the power state check
|
1 : disable the power state check
|
||||||
|
|
||||||
acpi_pm_good [X86-32,X86-64]
|
acpi_enforce_resources= [ACPI]
|
||||||
Override the pmtimer bug detection: force the kernel
|
{ strict | lax | no }
|
||||||
to assume that this machine's pmtimer latches its value
|
Check for resource conflicts between native drivers
|
||||||
and always returns good values.
|
and ACPI OperationRegions (SystemIO and SystemMemory
|
||||||
|
only). IO ports and memory declared in ACPI might be
|
||||||
agp= [AGP]
|
used by the ACPI subsystem in arbitrary AML code and
|
||||||
{ off | try_unsupported }
|
can interfere with legacy drivers.
|
||||||
off: disable AGP support
|
strict (default): access to resources claimed by ACPI
|
||||||
try_unsupported: try to drive unsupported chipsets
|
is denied; legacy drivers trying to access reserved
|
||||||
(may crash computer or cause data corruption)
|
resources will fail to bind to device using them.
|
||||||
|
lax: access to resources claimed by ACPI is allowed;
|
||||||
enable_timer_pin_1 [i386,x86-64]
|
legacy drivers trying to access reserved resources
|
||||||
Enable PIN 1 of APIC timer
|
will bind successfully but a warning message is logged.
|
||||||
Can be useful to work around chipset bugs
|
no: ACPI OperationRegions are not marked as reserved,
|
||||||
(in particular on some ATI chipsets).
|
no further checks are performed.
|
||||||
The kernel tries to set a reasonable default.
|
|
||||||
|
|
||||||
disable_timer_pin_1 [i386,x86-64]
|
|
||||||
Disable PIN 1 of APIC timer
|
|
||||||
Can be useful to work around chipset bugs.
|
|
||||||
|
|
||||||
ad1848= [HW,OSS]
|
ad1848= [HW,OSS]
|
||||||
Format: <io>,<irq>,<dma>,<dma2>,<type>
|
Format: <io>,<irq>,<dma>,<dma2>,<type>
|
||||||
|
@ -288,6 +260,12 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
Format: <io>,<irq>,<dma>,<mss_io>,<mpu_io>,<mpu_irq>
|
Format: <io>,<irq>,<dma>,<mss_io>,<mpu_io>,<mpu_irq>
|
||||||
See also header of sound/oss/aedsp16.c.
|
See also header of sound/oss/aedsp16.c.
|
||||||
|
|
||||||
|
agp= [AGP]
|
||||||
|
{ off | try_unsupported }
|
||||||
|
off: disable AGP support
|
||||||
|
try_unsupported: try to drive unsupported chipsets
|
||||||
|
(may crash computer or cause data corruption)
|
||||||
|
|
||||||
aha152x= [HW,SCSI]
|
aha152x= [HW,SCSI]
|
||||||
See Documentation/scsi/aha152x.txt.
|
See Documentation/scsi/aha152x.txt.
|
||||||
|
|
||||||
|
@ -415,12 +393,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
possible to determine what the correct size should be.
|
possible to determine what the correct size should be.
|
||||||
This option provides an override for these situations.
|
This option provides an override for these situations.
|
||||||
|
|
||||||
security= [SECURITY] Choose a security module to enable at boot.
|
|
||||||
If this boot parameter is not specified, only the first
|
|
||||||
security module asking for security registration will be
|
|
||||||
loaded. An invalid security module name will be treated
|
|
||||||
as if no module has been chosen.
|
|
||||||
|
|
||||||
capability.disable=
|
capability.disable=
|
||||||
[SECURITY] Disable capabilities. This would normally
|
[SECURITY] Disable capabilities. This would normally
|
||||||
be used only if an alternative security model is to be
|
be used only if an alternative security model is to be
|
||||||
|
@ -492,24 +464,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
Range: 0 - 8192
|
Range: 0 - 8192
|
||||||
Default: 64
|
Default: 64
|
||||||
|
|
||||||
dma_debug=off If the kernel is compiled with DMA_API_DEBUG support
|
|
||||||
this option disables the debugging code at boot.
|
|
||||||
|
|
||||||
dma_debug_entries=<number>
|
|
||||||
This option allows to tune the number of preallocated
|
|
||||||
entries for DMA-API debugging code. One entry is
|
|
||||||
required per DMA-API allocation. Use this if the
|
|
||||||
DMA-API debugging code disables itself because the
|
|
||||||
architectural default is too low.
|
|
||||||
|
|
||||||
hpet= [X86-32,HPET] option to control HPET usage
|
|
||||||
Format: { enable (default) | disable | force |
|
|
||||||
verbose }
|
|
||||||
disable: disable HPET and use PIT instead
|
|
||||||
force: allow force enabled of undocumented chips (ICH4,
|
|
||||||
VIA, nVidia)
|
|
||||||
verbose: show contents of HPET registers during setup
|
|
||||||
|
|
||||||
com20020= [HW,NET] ARCnet - COM20020 chipset
|
com20020= [HW,NET] ARCnet - COM20020 chipset
|
||||||
Format:
|
Format:
|
||||||
<io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]]
|
<io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]]
|
||||||
|
@ -553,23 +507,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
console=brl,ttyS0
|
console=brl,ttyS0
|
||||||
For now, only VisioBraille is supported.
|
For now, only VisioBraille is supported.
|
||||||
|
|
||||||
earlycon= [KNL] Output early console device and options.
|
|
||||||
uart[8250],io,<addr>[,options]
|
|
||||||
uart[8250],mmio,<addr>[,options]
|
|
||||||
Start an early, polled-mode console on the 8250/16550
|
|
||||||
UART at the specified I/O port or MMIO address.
|
|
||||||
The options are the same as for ttyS, above.
|
|
||||||
|
|
||||||
no_console_suspend
|
|
||||||
[HW] Never suspend the console
|
|
||||||
Disable suspending of consoles during suspend and
|
|
||||||
hibernate operations. Once disabled, debugging
|
|
||||||
messages can reach various consoles while the rest
|
|
||||||
of the system is being put to sleep (ie, while
|
|
||||||
debugging driver suspend/resume hooks). This may
|
|
||||||
not work reliably with all consoles, but is known
|
|
||||||
to work with serial and VGA consoles.
|
|
||||||
|
|
||||||
coredump_filter=
|
coredump_filter=
|
||||||
[KNL] Change the default value for
|
[KNL] Change the default value for
|
||||||
/proc/<pid>/coredump_filter.
|
/proc/<pid>/coredump_filter.
|
||||||
|
@ -617,36 +554,22 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
|
|
||||||
debug_objects [KNL] Enable object debugging
|
debug_objects [KNL] Enable object debugging
|
||||||
|
|
||||||
|
no_debug_objects
|
||||||
|
[KNL] Disable object debugging
|
||||||
|
|
||||||
debugpat [X86] Enable PAT debugging
|
debugpat [X86] Enable PAT debugging
|
||||||
|
|
||||||
decnet.addr= [HW,NET]
|
decnet.addr= [HW,NET]
|
||||||
Format: <area>[,<node>]
|
Format: <area>[,<node>]
|
||||||
See also Documentation/networking/decnet.txt.
|
See also Documentation/networking/decnet.txt.
|
||||||
|
|
||||||
vt.default_blu= [VT]
|
default_hugepagesz=
|
||||||
Format: <blue0>,<blue1>,<blue2>,...,<blue15>
|
[same as hugepagesz=] The size of the default
|
||||||
Change the default blue palette of the console.
|
HugeTLB page size. This is the size represented by
|
||||||
This is a 16-member array composed of values
|
the legacy /proc/ hugepages APIs, used for SHM, and
|
||||||
ranging from 0-255.
|
default size when mounting hugetlbfs filesystems.
|
||||||
|
Defaults to the default architecture's huge page size
|
||||||
vt.default_grn= [VT]
|
if not specified.
|
||||||
Format: <green0>,<green1>,<green2>,...,<green15>
|
|
||||||
Change the default green palette of the console.
|
|
||||||
This is a 16-member array composed of values
|
|
||||||
ranging from 0-255.
|
|
||||||
|
|
||||||
vt.default_red= [VT]
|
|
||||||
Format: <red0>,<red1>,<red2>,...,<red15>
|
|
||||||
Change the default red palette of the console.
|
|
||||||
This is a 16-member array composed of values
|
|
||||||
ranging from 0-255.
|
|
||||||
|
|
||||||
vt.default_utf8=
|
|
||||||
[VT]
|
|
||||||
Format=<0|1>
|
|
||||||
Set system-wide default UTF-8 mode for all tty's.
|
|
||||||
Default is 1, i.e. UTF-8 mode is enabled for all
|
|
||||||
newly opened terminals.
|
|
||||||
|
|
||||||
dhash_entries= [KNL]
|
dhash_entries= [KNL]
|
||||||
Set number of hash buckets for dentry cache.
|
Set number of hash buckets for dentry cache.
|
||||||
|
@ -659,27 +582,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
Documentation/serial/digiepca.txt.
|
Documentation/serial/digiepca.txt.
|
||||||
|
|
||||||
disable_mtrr_cleanup [X86]
|
disable_mtrr_cleanup [X86]
|
||||||
enable_mtrr_cleanup [X86]
|
|
||||||
The kernel tries to adjust MTRR layout from continuous
|
The kernel tries to adjust MTRR layout from continuous
|
||||||
to discrete, to make X server driver able to add WB
|
to discrete, to make X server driver able to add WB
|
||||||
entry later. This parameter enables/disables that.
|
entry later. This parameter disables that.
|
||||||
|
|
||||||
mtrr_chunk_size=nn[KMG] [X86]
|
|
||||||
used for mtrr cleanup. It is largest continous chunk
|
|
||||||
that could hold holes aka. UC entries.
|
|
||||||
|
|
||||||
mtrr_gran_size=nn[KMG] [X86]
|
|
||||||
Used for mtrr cleanup. It is granularity of mtrr block.
|
|
||||||
Default is 1.
|
|
||||||
Large value could prevent small alignment from
|
|
||||||
using up MTRRs.
|
|
||||||
|
|
||||||
mtrr_spare_reg_nr=n [X86]
|
|
||||||
Format: <integer>
|
|
||||||
Range: 0,7 : spare reg number
|
|
||||||
Default : 1
|
|
||||||
Used for mtrr cleanup. It is spare mtrr entries number.
|
|
||||||
Set to 2 or more if your graphical card needs more.
|
|
||||||
|
|
||||||
disable_mtrr_trim [X86, Intel and AMD only]
|
disable_mtrr_trim [X86, Intel and AMD only]
|
||||||
By default the kernel will trim any uncacheable
|
By default the kernel will trim any uncacheable
|
||||||
|
@ -687,12 +592,38 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
MTRR settings. This parameter disables that behavior,
|
MTRR settings. This parameter disables that behavior,
|
||||||
possibly causing your machine to run very slowly.
|
possibly causing your machine to run very slowly.
|
||||||
|
|
||||||
|
disable_timer_pin_1 [i386,x86-64]
|
||||||
|
Disable PIN 1 of APIC timer
|
||||||
|
Can be useful to work around chipset bugs.
|
||||||
|
|
||||||
dmasound= [HW,OSS] Sound subsystem buffers
|
dmasound= [HW,OSS] Sound subsystem buffers
|
||||||
|
|
||||||
|
dma_debug=off If the kernel is compiled with DMA_API_DEBUG support,
|
||||||
|
this option disables the debugging code at boot.
|
||||||
|
|
||||||
|
dma_debug_entries=<number>
|
||||||
|
This option allows to tune the number of preallocated
|
||||||
|
entries for DMA-API debugging code. One entry is
|
||||||
|
required per DMA-API allocation. Use this if the
|
||||||
|
DMA-API debugging code disables itself because the
|
||||||
|
architectural default is too low.
|
||||||
|
|
||||||
dscc4.setup= [NET]
|
dscc4.setup= [NET]
|
||||||
|
|
||||||
dtc3181e= [HW,SCSI]
|
dtc3181e= [HW,SCSI]
|
||||||
|
|
||||||
|
dynamic_printk Enables pr_debug()/dev_dbg() calls if
|
||||||
|
CONFIG_DYNAMIC_PRINTK_DEBUG has been enabled.
|
||||||
|
These can also be switched on/off via
|
||||||
|
<debugfs>/dynamic_printk/modules
|
||||||
|
|
||||||
|
earlycon= [KNL] Output early console device and options.
|
||||||
|
uart[8250],io,<addr>[,options]
|
||||||
|
uart[8250],mmio,<addr>[,options]
|
||||||
|
Start an early, polled-mode console on the 8250/16550
|
||||||
|
UART at the specified I/O port or MMIO address.
|
||||||
|
The options are the same as for ttyS, above.
|
||||||
|
|
||||||
earlyprintk= [X86-32,X86-64,SH,BLACKFIN]
|
earlyprintk= [X86-32,X86-64,SH,BLACKFIN]
|
||||||
earlyprintk=vga
|
earlyprintk=vga
|
||||||
earlyprintk=serial[,ttySn[,baudrate]]
|
earlyprintk=serial[,ttySn[,baudrate]]
|
||||||
|
@ -734,6 +665,17 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
pass this option to capture kernel.
|
pass this option to capture kernel.
|
||||||
See Documentation/kdump/kdump.txt for details.
|
See Documentation/kdump/kdump.txt for details.
|
||||||
|
|
||||||
|
enable_mtrr_cleanup [X86]
|
||||||
|
The kernel tries to adjust MTRR layout from continuous
|
||||||
|
to discrete, to make X server driver able to add WB
|
||||||
|
entry later. This parameter enables that.
|
||||||
|
|
||||||
|
enable_timer_pin_1 [i386,x86-64]
|
||||||
|
Enable PIN 1 of APIC timer
|
||||||
|
Can be useful to work around chipset bugs
|
||||||
|
(in particular on some ATI chipsets).
|
||||||
|
The kernel tries to set a reasonable default.
|
||||||
|
|
||||||
enforcing [SELINUX] Set initial enforcing status.
|
enforcing [SELINUX] Set initial enforcing status.
|
||||||
Format: {"0" | "1"}
|
Format: {"0" | "1"}
|
||||||
See security/selinux/Kconfig help text.
|
See security/selinux/Kconfig help text.
|
||||||
|
@ -821,6 +763,16 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
hisax= [HW,ISDN]
|
hisax= [HW,ISDN]
|
||||||
See Documentation/isdn/README.HiSax.
|
See Documentation/isdn/README.HiSax.
|
||||||
|
|
||||||
|
hlt [BUGS=ARM,SH]
|
||||||
|
|
||||||
|
hpet= [X86-32,HPET] option to control HPET usage
|
||||||
|
Format: { enable (default) | disable | force |
|
||||||
|
verbose }
|
||||||
|
disable: disable HPET and use PIT instead
|
||||||
|
force: allow force enabled of undocumented chips (ICH4,
|
||||||
|
VIA, nVidia)
|
||||||
|
verbose: show contents of HPET registers during setup
|
||||||
|
|
||||||
hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
|
hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
|
||||||
hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
|
hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
|
||||||
On x86-64 and powerpc, this option can be specified
|
On x86-64 and powerpc, this option can be specified
|
||||||
|
@ -830,15 +782,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
(when the CPU supports the "pdpe1gb" cpuinfo flag)
|
(when the CPU supports the "pdpe1gb" cpuinfo flag)
|
||||||
Note that 1GB pages can only be allocated at boot time
|
Note that 1GB pages can only be allocated at boot time
|
||||||
using hugepages= and not freed afterwards.
|
using hugepages= and not freed afterwards.
|
||||||
default_hugepagesz=
|
|
||||||
[same as hugepagesz=] The size of the default
|
|
||||||
HugeTLB page size. This is the size represented by
|
|
||||||
the legacy /proc/ hugepages APIs, used for SHM, and
|
|
||||||
default size when mounting hugetlbfs filesystems.
|
|
||||||
Defaults to the default architecture's huge page size
|
|
||||||
if not specified.
|
|
||||||
|
|
||||||
hlt [BUGS=ARM,SH]
|
|
||||||
|
|
||||||
hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC)
|
hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC)
|
||||||
terminal devices. Valid values: 0..8
|
terminal devices. Valid values: 0..8
|
||||||
|
@ -899,6 +842,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed
|
idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed
|
||||||
See Documentation/ide/ide.txt.
|
See Documentation/ide/ide.txt.
|
||||||
|
|
||||||
|
ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
|
||||||
|
Claim all unknown PCI IDE storage controllers.
|
||||||
|
|
||||||
idle= [X86]
|
idle= [X86]
|
||||||
Format: idle=poll, idle=mwait, idle=halt, idle=nomwait
|
Format: idle=poll, idle=mwait, idle=halt, idle=nomwait
|
||||||
Poll forces a polling idle loop that can slightly
|
Poll forces a polling idle loop that can slightly
|
||||||
|
@ -914,9 +860,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
In such case C2/C3 won't be used again.
|
In such case C2/C3 won't be used again.
|
||||||
idle=nomwait: Disable mwait for CPU C-states
|
idle=nomwait: Disable mwait for CPU C-states
|
||||||
|
|
||||||
ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
|
|
||||||
Claim all unknown PCI IDE storage controllers.
|
|
||||||
|
|
||||||
ignore_loglevel [KNL]
|
ignore_loglevel [KNL]
|
||||||
Ignore loglevel setting - this will print /all/
|
Ignore loglevel setting - this will print /all/
|
||||||
kernel messages to the console. Useful for debugging.
|
kernel messages to the console. Useful for debugging.
|
||||||
|
@ -950,25 +893,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver
|
inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver
|
||||||
Format: <irq>
|
Format: <irq>
|
||||||
|
|
||||||
inttest= [IA64]
|
|
||||||
|
|
||||||
iomem= Disable strict checking of access to MMIO memory
|
|
||||||
strict regions from userspace.
|
|
||||||
relaxed
|
|
||||||
|
|
||||||
iommu= [x86]
|
|
||||||
off
|
|
||||||
force
|
|
||||||
noforce
|
|
||||||
biomerge
|
|
||||||
panic
|
|
||||||
nopanic
|
|
||||||
merge
|
|
||||||
nomerge
|
|
||||||
forcesac
|
|
||||||
soft
|
|
||||||
|
|
||||||
|
|
||||||
intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option
|
intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option
|
||||||
on
|
on
|
||||||
Enable intel iommu driver.
|
Enable intel iommu driver.
|
||||||
|
@ -992,6 +916,28 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
result in a hardware IOTLB flush operation as opposed
|
result in a hardware IOTLB flush operation as opposed
|
||||||
to batching them for performance.
|
to batching them for performance.
|
||||||
|
|
||||||
|
inttest= [IA64]
|
||||||
|
|
||||||
|
iomem= Disable strict checking of access to MMIO memory
|
||||||
|
strict regions from userspace.
|
||||||
|
relaxed
|
||||||
|
|
||||||
|
iommu= [x86]
|
||||||
|
off
|
||||||
|
force
|
||||||
|
noforce
|
||||||
|
biomerge
|
||||||
|
panic
|
||||||
|
nopanic
|
||||||
|
merge
|
||||||
|
nomerge
|
||||||
|
forcesac
|
||||||
|
soft
|
||||||
|
|
||||||
|
io7= [HW] IO7 for Marvel based alpha systems
|
||||||
|
See comment before marvel_specify_io7 in
|
||||||
|
arch/alpha/kernel/core_marvel.c.
|
||||||
|
|
||||||
io_delay= [X86-32,X86-64] I/O delay method
|
io_delay= [X86-32,X86-64] I/O delay method
|
||||||
0x80
|
0x80
|
||||||
Standard port 0x80 based delay
|
Standard port 0x80 based delay
|
||||||
|
@ -1002,10 +948,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
none
|
none
|
||||||
No delay
|
No delay
|
||||||
|
|
||||||
io7= [HW] IO7 for Marvel based alpha systems
|
|
||||||
See comment before marvel_specify_io7 in
|
|
||||||
arch/alpha/kernel/core_marvel.c.
|
|
||||||
|
|
||||||
ip= [IP_PNP]
|
ip= [IP_PNP]
|
||||||
See Documentation/filesystems/nfsroot.txt.
|
See Documentation/filesystems/nfsroot.txt.
|
||||||
|
|
||||||
|
@ -1016,12 +958,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
ips= [HW,SCSI] Adaptec / IBM ServeRAID controller
|
ips= [HW,SCSI] Adaptec / IBM ServeRAID controller
|
||||||
See header of drivers/scsi/ips.c.
|
See header of drivers/scsi/ips.c.
|
||||||
|
|
||||||
ports= [IP_VS_FTP] IPVS ftp helper module
|
|
||||||
Default is 21.
|
|
||||||
Up to 8 (IP_VS_APP_MAX_PORTS) ports
|
|
||||||
may be specified.
|
|
||||||
Format: <port>,<port>....
|
|
||||||
|
|
||||||
irqfixup [HW]
|
irqfixup [HW]
|
||||||
When an interrupt is not handled search all handlers
|
When an interrupt is not handled search all handlers
|
||||||
for it. Intended to get systems with badly broken
|
for it. Intended to get systems with badly broken
|
||||||
|
@ -1062,6 +998,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
js= [HW,JOY] Analog joystick
|
js= [HW,JOY] Analog joystick
|
||||||
See Documentation/input/joystick.txt.
|
See Documentation/input/joystick.txt.
|
||||||
|
|
||||||
|
keepinitrd [HW,ARM]
|
||||||
|
|
||||||
kernelcore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
|
kernelcore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
|
||||||
specifies the amount of memory usable by the kernel
|
specifies the amount of memory usable by the kernel
|
||||||
for non-movable allocations. The requested amount is
|
for non-movable allocations. The requested amount is
|
||||||
|
@ -1078,20 +1016,14 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
use the HighMem zone if it exists, and the Normal
|
use the HighMem zone if it exists, and the Normal
|
||||||
zone if it does not.
|
zone if it does not.
|
||||||
|
|
||||||
movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
|
kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no }
|
||||||
is similar to kernelcore except it specifies the
|
Controls whether kmemtrace is enabled
|
||||||
amount of memory used for migratable allocations.
|
at boot-time.
|
||||||
If both kernelcore and movablecore is specified,
|
|
||||||
then kernelcore will be at *least* the specified
|
|
||||||
value but may be more. If movablecore on its own
|
|
||||||
is specified, the administrator must be careful
|
|
||||||
that the amount of memory usable for all allocations
|
|
||||||
is not too small.
|
|
||||||
|
|
||||||
keepinitrd [HW,ARM]
|
kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of
|
||||||
|
subbufs kmemtrace's relay channel has. Set this
|
||||||
kstack=N [X86-32,X86-64] Print N words from the kernel stack
|
higher than default (KMEMTRACE_N_SUBBUFS in code) if
|
||||||
in oops dumps.
|
you experience buffer overruns.
|
||||||
|
|
||||||
kgdboc= [HW] kgdb over consoles.
|
kgdboc= [HW] kgdb over consoles.
|
||||||
Requires a tty driver that supports console polling.
|
Requires a tty driver that supports console polling.
|
||||||
|
@ -1102,6 +1034,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
Configure the RouterBoard 532 series on-chip
|
Configure the RouterBoard 532 series on-chip
|
||||||
Ethernet adapter MAC address.
|
Ethernet adapter MAC address.
|
||||||
|
|
||||||
|
kstack=N [X86-32,X86-64] Print N words from the kernel stack
|
||||||
|
in oops dumps.
|
||||||
|
|
||||||
l2cr= [PPC]
|
l2cr= [PPC]
|
||||||
|
|
||||||
l3cr= [PPC]
|
l3cr= [PPC]
|
||||||
|
@ -1247,9 +1182,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
(machvec) in a generic kernel.
|
(machvec) in a generic kernel.
|
||||||
Example: machvec=hpzx1_swiotlb
|
Example: machvec=hpzx1_swiotlb
|
||||||
|
|
||||||
max_loop= [LOOP] Maximum number of loopback devices that can
|
max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater
|
||||||
be mounted
|
than or equal to this physical address is ignored.
|
||||||
Format: <1-256>
|
|
||||||
|
|
||||||
maxcpus= [SMP] Maximum number of processors that an SMP kernel
|
maxcpus= [SMP] Maximum number of processors that an SMP kernel
|
||||||
should make use of. maxcpus=n : n >= 0 limits the
|
should make use of. maxcpus=n : n >= 0 limits the
|
||||||
|
@ -1257,8 +1191,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
it is equivalent to "nosmp", which also disables
|
it is equivalent to "nosmp", which also disables
|
||||||
the IO APIC.
|
the IO APIC.
|
||||||
|
|
||||||
max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater than
|
max_loop= [LOOP] Maximum number of loopback devices that can
|
||||||
or equal to this physical address is ignored.
|
be mounted
|
||||||
|
Format: <1-256>
|
||||||
|
|
||||||
max_luns= [SCSI] Maximum number of LUNs to probe.
|
max_luns= [SCSI] Maximum number of LUNs to probe.
|
||||||
Should be between 1 and 2^32-1.
|
Should be between 1 and 2^32-1.
|
||||||
|
@ -1385,6 +1320,16 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
mousedev.yres= [MOUSE] Vertical screen resolution, used for devices
|
mousedev.yres= [MOUSE] Vertical screen resolution, used for devices
|
||||||
reporting absolute coordinates, such as tablets
|
reporting absolute coordinates, such as tablets
|
||||||
|
|
||||||
|
movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
|
||||||
|
is similar to kernelcore except it specifies the
|
||||||
|
amount of memory used for migratable allocations.
|
||||||
|
If both kernelcore and movablecore is specified,
|
||||||
|
then kernelcore will be at *least* the specified
|
||||||
|
value but may be more. If movablecore on its own
|
||||||
|
is specified, the administrator must be careful
|
||||||
|
that the amount of memory usable for all allocations
|
||||||
|
is not too small.
|
||||||
|
|
||||||
mpu401= [HW,OSS]
|
mpu401= [HW,OSS]
|
||||||
Format: <io>,<irq>
|
Format: <io>,<irq>
|
||||||
|
|
||||||
|
@ -1406,6 +1351,23 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
[HW] Make the MicroTouch USB driver use raw coordinates
|
[HW] Make the MicroTouch USB driver use raw coordinates
|
||||||
('y', default) or cooked coordinates ('n')
|
('y', default) or cooked coordinates ('n')
|
||||||
|
|
||||||
|
mtrr_chunk_size=nn[KMG] [X86]
|
||||||
|
used for mtrr cleanup. It is largest continous chunk
|
||||||
|
that could hold holes aka. UC entries.
|
||||||
|
|
||||||
|
mtrr_gran_size=nn[KMG] [X86]
|
||||||
|
Used for mtrr cleanup. It is granularity of mtrr block.
|
||||||
|
Default is 1.
|
||||||
|
Large value could prevent small alignment from
|
||||||
|
using up MTRRs.
|
||||||
|
|
||||||
|
mtrr_spare_reg_nr=n [X86]
|
||||||
|
Format: <integer>
|
||||||
|
Range: 0,7 : spare reg number
|
||||||
|
Default : 1
|
||||||
|
Used for mtrr cleanup. It is spare mtrr entries number.
|
||||||
|
Set to 2 or more if your graphical card needs more.
|
||||||
|
|
||||||
n2= [NET] SDL Inc. RISCom/N2 synchronous serial card
|
n2= [NET] SDL Inc. RISCom/N2 synchronous serial card
|
||||||
|
|
||||||
NCR_D700= [HW,SCSI]
|
NCR_D700= [HW,SCSI]
|
||||||
|
@ -1466,11 +1428,13 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
0 - turn nmi_watchdog off
|
0 - turn nmi_watchdog off
|
||||||
1 - use the IO-APIC timer for the NMI watchdog
|
1 - use the IO-APIC timer for the NMI watchdog
|
||||||
2 - use the local APIC for the NMI watchdog using
|
2 - use the local APIC for the NMI watchdog using
|
||||||
a performance counter. Note: This will use one performance
|
a performance counter. Note: This will use one
|
||||||
counter and the local APIC's performance vector.
|
performance counter and the local APIC's performance
|
||||||
When panic is specified panic when an NMI watchdog timeout occurs.
|
vector.
|
||||||
This is useful when you use a panic=... timeout and need the box
|
When panic is specified, panic when an NMI watchdog
|
||||||
quickly up again.
|
timeout occurs.
|
||||||
|
This is useful when you use a panic=... timeout and
|
||||||
|
need the box quickly up again.
|
||||||
Instead of 1 and 2 it is possible to use the following
|
Instead of 1 and 2 it is possible to use the following
|
||||||
symbolic names: lapic and ioapic
|
symbolic names: lapic and ioapic
|
||||||
Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
|
Example: nmi_watchdog=2 or nmi_watchdog=panic,lapic
|
||||||
|
@ -1479,6 +1443,16 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
emulation library even if a 387 maths coprocessor
|
emulation library even if a 387 maths coprocessor
|
||||||
is present.
|
is present.
|
||||||
|
|
||||||
|
no_console_suspend
|
||||||
|
[HW] Never suspend the console
|
||||||
|
Disable suspending of consoles during suspend and
|
||||||
|
hibernate operations. Once disabled, debugging
|
||||||
|
messages can reach various consoles while the rest
|
||||||
|
of the system is being put to sleep (ie, while
|
||||||
|
debugging driver suspend/resume hooks). This may
|
||||||
|
not work reliably with all consoles, but is known
|
||||||
|
to work with serial and VGA consoles.
|
||||||
|
|
||||||
noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
|
noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
|
||||||
caches in the slab allocator. Saves per-node memory,
|
caches in the slab allocator. Saves per-node memory,
|
||||||
but will impact performance.
|
but will impact performance.
|
||||||
|
@ -1493,6 +1467,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
|
|
||||||
nocache [ARM]
|
nocache [ARM]
|
||||||
|
|
||||||
|
noclflush [BUGS=X86] Don't use the CLFLUSH instruction
|
||||||
|
|
||||||
nodelayacct [KNL] Disable per-task delay accounting
|
nodelayacct [KNL] Disable per-task delay accounting
|
||||||
|
|
||||||
nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects.
|
nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects.
|
||||||
|
@ -1521,9 +1497,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
register save and restore. The kernel will only save
|
register save and restore. The kernel will only save
|
||||||
legacy floating-point registers on task switch.
|
legacy floating-point registers on task switch.
|
||||||
|
|
||||||
noclflush [BUGS=X86] Don't use the CLFLUSH instruction
|
nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
|
||||||
|
wfi(ARM) instruction doesn't work correctly and not to
|
||||||
nohlt [BUGS=ARM,SH]
|
use it. This is also useful when using JTAG debugger.
|
||||||
|
|
||||||
no-hlt [BUGS=X86-32] Tells the kernel that the hlt
|
no-hlt [BUGS=X86-32] Tells the kernel that the hlt
|
||||||
instruction doesn't work correctly and not to
|
instruction doesn't work correctly and not to
|
||||||
|
@ -1544,6 +1520,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
Valid arguments: on, off
|
Valid arguments: on, off
|
||||||
Default: on
|
Default: on
|
||||||
|
|
||||||
|
noiotrap [SH] Disables trapped I/O port accesses.
|
||||||
|
|
||||||
noirqdebug [X86-32] Disables the code which attempts to detect and
|
noirqdebug [X86-32] Disables the code which attempts to detect and
|
||||||
disable unhandled interrupt sources.
|
disable unhandled interrupt sources.
|
||||||
|
|
||||||
|
@ -1563,12 +1541,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
|
|
||||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||||
|
|
||||||
nox2apic [X86-64,APIC] Do not enable x2APIC mode.
|
|
||||||
|
|
||||||
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
|
|
||||||
default x2apic cluster mode on platforms
|
|
||||||
supporting x2apic.
|
|
||||||
|
|
||||||
noltlbs [PPC] Do not use large page/tlb entries for kernel
|
noltlbs [PPC] Do not use large page/tlb entries for kernel
|
||||||
lowmem mapping on PPC40x.
|
lowmem mapping on PPC40x.
|
||||||
|
|
||||||
|
@ -1579,6 +1551,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
nomfgpt [X86-32] Disable Multi-Function General Purpose
|
nomfgpt [X86-32] Disable Multi-Function General Purpose
|
||||||
Timer usage (for AMD Geode machines).
|
Timer usage (for AMD Geode machines).
|
||||||
|
|
||||||
|
norandmaps Don't use address space randomization. Equivalent to
|
||||||
|
echo 0 > /proc/sys/kernel/randomize_va_space
|
||||||
|
|
||||||
noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops
|
noreplace-paravirt [X86-32,PV_OPS] Don't patch paravirt_ops
|
||||||
|
|
||||||
noreplace-smp [X86-32,SMP] Don't replace SMP instructions
|
noreplace-smp [X86-32,SMP] Don't replace SMP instructions
|
||||||
|
@ -1603,7 +1578,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
nosoftlockup [KNL] Disable the soft-lockup detector.
|
nosoftlockup [KNL] Disable the soft-lockup detector.
|
||||||
|
|
||||||
noswapaccount [KNL] Disable accounting of swap in memory resource
|
noswapaccount [KNL] Disable accounting of swap in memory resource
|
||||||
controller. (See Documentation/controllers/memory.txt)
|
controller. (See Documentation/cgroups/memory.txt)
|
||||||
|
|
||||||
nosync [HW,M68K] Disables sync negotiation for all devices.
|
nosync [HW,M68K] Disables sync negotiation for all devices.
|
||||||
|
|
||||||
|
@ -1617,13 +1592,13 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
purges which is reported from either PAL_VM_SUMMARY or
|
purges which is reported from either PAL_VM_SUMMARY or
|
||||||
SAL PALO.
|
SAL PALO.
|
||||||
|
|
||||||
|
nr_uarts= [SERIAL] maximum number of UARTs to be registered.
|
||||||
|
|
||||||
numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
|
numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
|
||||||
one of ['zone', 'node', 'default'] can be specified
|
one of ['zone', 'node', 'default'] can be specified
|
||||||
This can be set from sysctl after boot.
|
This can be set from sysctl after boot.
|
||||||
See Documentation/sysctl/vm.txt for details.
|
See Documentation/sysctl/vm.txt for details.
|
||||||
|
|
||||||
nr_uarts= [SERIAL] maximum number of UARTs to be registered.
|
|
||||||
|
|
||||||
ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
|
ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
|
||||||
See Documentation/debugging-via-ohci1394.txt for more
|
See Documentation/debugging-via-ohci1394.txt for more
|
||||||
info.
|
info.
|
||||||
|
@ -1872,6 +1847,14 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
printk.time= Show timing data prefixed to each printk message line
|
printk.time= Show timing data prefixed to each printk message line
|
||||||
Format: <bool> (1/Y/y=enable, 0/N/n=disable)
|
Format: <bool> (1/Y/y=enable, 0/N/n=disable)
|
||||||
|
|
||||||
|
processor.max_cstate= [HW,ACPI]
|
||||||
|
Limit processor to maximum C-state
|
||||||
|
max_cstate=9 overrides any DMI blacklist limit.
|
||||||
|
|
||||||
|
processor.nocst [HW,ACPI]
|
||||||
|
Ignore the _CST method to determine C-states,
|
||||||
|
instead using the legacy FADT method
|
||||||
|
|
||||||
profile= [KNL] Enable kernel profiling via /proc/profile
|
profile= [KNL] Enable kernel profiling via /proc/profile
|
||||||
Format: [schedule,]<number>
|
Format: [schedule,]<number>
|
||||||
Param: "schedule" - profile schedule points.
|
Param: "schedule" - profile schedule points.
|
||||||
|
@ -1881,14 +1864,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
Requires CONFIG_SCHEDSTATS
|
Requires CONFIG_SCHEDSTATS
|
||||||
Param: "kvm" - profile VM exits.
|
Param: "kvm" - profile VM exits.
|
||||||
|
|
||||||
processor.max_cstate= [HW,ACPI]
|
|
||||||
Limit processor to maximum C-state
|
|
||||||
max_cstate=9 overrides any DMI blacklist limit.
|
|
||||||
|
|
||||||
processor.nocst [HW,ACPI]
|
|
||||||
Ignore the _CST method to determine C-states,
|
|
||||||
instead using the legacy FADT method
|
|
||||||
|
|
||||||
prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk
|
prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk
|
||||||
before loading.
|
before loading.
|
||||||
See Documentation/blockdev/ramdisk.txt.
|
See Documentation/blockdev/ramdisk.txt.
|
||||||
|
@ -1953,7 +1928,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
|
|
||||||
relax_domain_level=
|
relax_domain_level=
|
||||||
[KNL, SMP] Set scheduler's default relax_domain_level.
|
[KNL, SMP] Set scheduler's default relax_domain_level.
|
||||||
See Documentation/cpusets.txt.
|
See Documentation/cgroups/cpusets.txt.
|
||||||
|
|
||||||
reserve= [KNL,BUGS] Force the kernel to ignore some iomem area
|
reserve= [KNL,BUGS] Force the kernel to ignore some iomem area
|
||||||
|
|
||||||
|
@ -2042,7 +2017,13 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
allowing boot to proceed. none ignores them, expecting
|
allowing boot to proceed. none ignores them, expecting
|
||||||
user space to do the scan.
|
user space to do the scan.
|
||||||
|
|
||||||
selinux [SELINUX] Disable or enable SELinux at boot time.
|
security= [SECURITY] Choose a security module to enable at boot.
|
||||||
|
If this boot parameter is not specified, only the first
|
||||||
|
security module asking for security registration will be
|
||||||
|
loaded. An invalid security module name will be treated
|
||||||
|
as if no module has been chosen.
|
||||||
|
|
||||||
|
selinux= [SELINUX] Disable or enable SELinux at boot time.
|
||||||
Format: { "0" | "1" }
|
Format: { "0" | "1" }
|
||||||
See security/selinux/Kconfig help text.
|
See security/selinux/Kconfig help text.
|
||||||
0 -- disable.
|
0 -- disable.
|
||||||
|
@ -2362,6 +2343,8 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
|
|
||||||
tp720= [HW,PS2]
|
tp720= [HW,PS2]
|
||||||
|
|
||||||
|
trace_buf_size=nn[KMG] [ftrace] will set tracing buffer size.
|
||||||
|
|
||||||
trix= [HW,OSS] MediaTrix AudioTrix Pro
|
trix= [HW,OSS] MediaTrix AudioTrix Pro
|
||||||
Format:
|
Format:
|
||||||
<io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
|
<io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
|
||||||
|
@ -2464,9 +2447,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
medium is write-protected).
|
medium is write-protected).
|
||||||
Example: quirks=0419:aaf5:rl,0421:0433:rc
|
Example: quirks=0419:aaf5:rl,0421:0433:rc
|
||||||
|
|
||||||
add_efi_memmap [EFI; x86-32,X86-64] Include EFI memory map in
|
|
||||||
kernel's map of available physical RAM.
|
|
||||||
|
|
||||||
vdso= [X86-32,SH,x86-64]
|
vdso= [X86-32,SH,x86-64]
|
||||||
vdso=2: enable compat VDSO (default with COMPAT_VDSO)
|
vdso=2: enable compat VDSO (default with COMPAT_VDSO)
|
||||||
vdso=1: enable VDSO (default)
|
vdso=1: enable VDSO (default)
|
||||||
|
@ -2505,6 +2485,31 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
|
vmpoff= [KNL,S390] Perform z/VM CP command after power off.
|
||||||
Format: <command>
|
Format: <command>
|
||||||
|
|
||||||
|
vt.default_blu= [VT]
|
||||||
|
Format: <blue0>,<blue1>,<blue2>,...,<blue15>
|
||||||
|
Change the default blue palette of the console.
|
||||||
|
This is a 16-member array composed of values
|
||||||
|
ranging from 0-255.
|
||||||
|
|
||||||
|
vt.default_grn= [VT]
|
||||||
|
Format: <green0>,<green1>,<green2>,...,<green15>
|
||||||
|
Change the default green palette of the console.
|
||||||
|
This is a 16-member array composed of values
|
||||||
|
ranging from 0-255.
|
||||||
|
|
||||||
|
vt.default_red= [VT]
|
||||||
|
Format: <red0>,<red1>,<red2>,...,<red15>
|
||||||
|
Change the default red palette of the console.
|
||||||
|
This is a 16-member array composed of values
|
||||||
|
ranging from 0-255.
|
||||||
|
|
||||||
|
vt.default_utf8=
|
||||||
|
[VT]
|
||||||
|
Format=<0|1>
|
||||||
|
Set system-wide default UTF-8 mode for all tty's.
|
||||||
|
Default is 1, i.e. UTF-8 mode is enabled for all
|
||||||
|
newly opened terminals.
|
||||||
|
|
||||||
waveartist= [HW,OSS]
|
waveartist= [HW,OSS]
|
||||||
Format: <io>,<irq>,<dma>,<dma2>
|
Format: <io>,<irq>,<dma>,<dma2>
|
||||||
|
|
||||||
|
@ -2517,6 +2522,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
wdt= [WDT] Watchdog
|
wdt= [WDT] Watchdog
|
||||||
See Documentation/watchdog/wdt.txt.
|
See Documentation/watchdog/wdt.txt.
|
||||||
|
|
||||||
|
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
|
||||||
|
default x2apic cluster mode on platforms
|
||||||
|
supporting x2apic.
|
||||||
|
|
||||||
xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
|
xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
|
||||||
xd_geo= See header of drivers/block/xd.c.
|
xd_geo= See header of drivers/block/xd.c.
|
||||||
|
|
||||||
|
@ -2524,9 +2533,6 @@ and is between 256 and 4096 characters. It is defined in the file
|
||||||
Format:
|
Format:
|
||||||
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
|
<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
|
||||||
|
|
||||||
norandmaps Don't use address space randomization. Equivalent to
|
|
||||||
echo 0 > /proc/sys/kernel/randomize_va_space
|
|
||||||
|
|
||||||
______________________________________________________________________
|
______________________________________________________________________
|
||||||
|
|
||||||
TODO:
|
TODO:
|
||||||
|
|
|
@ -212,7 +212,9 @@ hit, Kprobes calls kp->pre_handler. After the probed instruction
|
||||||
is single-stepped, Kprobe calls kp->post_handler. If a fault
|
is single-stepped, Kprobe calls kp->post_handler. If a fault
|
||||||
occurs during execution of kp->pre_handler or kp->post_handler,
|
occurs during execution of kp->pre_handler or kp->post_handler,
|
||||||
or during single-stepping of the probed instruction, Kprobes calls
|
or during single-stepping of the probed instruction, Kprobes calls
|
||||||
kp->fault_handler. Any or all handlers can be NULL.
|
kp->fault_handler. Any or all handlers can be NULL. If kp->flags
|
||||||
|
is set KPROBE_FLAG_DISABLED, that kp will be registered but disabled,
|
||||||
|
so, it's handlers aren't hit until calling enable_kprobe(kp).
|
||||||
|
|
||||||
NOTE:
|
NOTE:
|
||||||
1. With the introduction of the "symbol_name" field to struct kprobe,
|
1. With the introduction of the "symbol_name" field to struct kprobe,
|
||||||
|
@ -363,6 +365,26 @@ probes) in the specified array, they clear the addr field of those
|
||||||
incorrect probes. However, other probes in the array are
|
incorrect probes. However, other probes in the array are
|
||||||
unregistered correctly.
|
unregistered correctly.
|
||||||
|
|
||||||
|
4.7 disable_*probe
|
||||||
|
|
||||||
|
#include <linux/kprobes.h>
|
||||||
|
int disable_kprobe(struct kprobe *kp);
|
||||||
|
int disable_kretprobe(struct kretprobe *rp);
|
||||||
|
int disable_jprobe(struct jprobe *jp);
|
||||||
|
|
||||||
|
Temporarily disables the specified *probe. You can enable it again by using
|
||||||
|
enable_*probe(). You must specify the probe which has been registered.
|
||||||
|
|
||||||
|
4.8 enable_*probe
|
||||||
|
|
||||||
|
#include <linux/kprobes.h>
|
||||||
|
int enable_kprobe(struct kprobe *kp);
|
||||||
|
int enable_kretprobe(struct kretprobe *rp);
|
||||||
|
int enable_jprobe(struct jprobe *jp);
|
||||||
|
|
||||||
|
Enables *probe which has been disabled by disable_*probe(). You must specify
|
||||||
|
the probe which has been registered.
|
||||||
|
|
||||||
5. Kprobes Features and Limitations
|
5. Kprobes Features and Limitations
|
||||||
|
|
||||||
Kprobes allows multiple probes at the same address. Currently,
|
Kprobes allows multiple probes at the same address. Currently,
|
||||||
|
@ -500,10 +522,14 @@ the probe. If the probed function belongs to a module, the module name
|
||||||
is also specified. Following columns show probe status. If the probe is on
|
is also specified. Following columns show probe status. If the probe is on
|
||||||
a virtual address that is no longer valid (module init sections, module
|
a virtual address that is no longer valid (module init sections, module
|
||||||
virtual addresses that correspond to modules that've been unloaded),
|
virtual addresses that correspond to modules that've been unloaded),
|
||||||
such probes are marked with [GONE].
|
such probes are marked with [GONE]. If the probe is temporarily disabled,
|
||||||
|
such probes are marked with [DISABLED].
|
||||||
|
|
||||||
/debug/kprobes/enabled: Turn kprobes ON/OFF
|
/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly.
|
||||||
|
|
||||||
Provides a knob to globally turn registered kprobes ON or OFF. By default,
|
Provides a knob to globally and forcibly turn registered kprobes ON or OFF.
|
||||||
all kprobes are enabled. By echoing "0" to this file, all registered probes
|
By default, all kprobes are enabled. By echoing "0" to this file, all
|
||||||
will be disarmed, till such time a "1" is echoed to this file.
|
registered probes will be disarmed, till such time a "1" is echoed to this
|
||||||
|
file. Note that this knob just disarms and arms all kprobes and doesn't
|
||||||
|
change each probe's disabling state. This means that disabled kprobes (marked
|
||||||
|
[DISABLED]) will be not enabled if you turn ON all kprobes by this knob.
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
Acer Laptop WMI Extras Driver
|
Acer Laptop WMI Extras Driver
|
||||||
http://code.google.com/p/aceracpi
|
http://code.google.com/p/aceracpi
|
||||||
Version 0.2
|
Version 0.3
|
||||||
18th August 2008
|
4th April 2009
|
||||||
|
|
||||||
Copyright 2007-2008 Carlos Corbacho <carlos@strangeworlds.co.uk>
|
Copyright 2007-2009 Carlos Corbacho <carlos@strangeworlds.co.uk>
|
||||||
|
|
||||||
acer-wmi is a driver to allow you to control various parts of your Acer laptop
|
acer-wmi is a driver to allow you to control various parts of your Acer laptop
|
||||||
hardware under Linux which are exposed via ACPI-WMI.
|
hardware under Linux which are exposed via ACPI-WMI.
|
||||||
|
@ -36,6 +36,10 @@ not possible in kernel space from a 64 bit OS.
|
||||||
Supported Hardware
|
Supported Hardware
|
||||||
******************
|
******************
|
||||||
|
|
||||||
|
NOTE: The Acer Aspire One is not supported hardware. It cannot work with
|
||||||
|
acer-wmi until Acer fix their ACPI-WMI implementation on them, so has been
|
||||||
|
blacklisted until that happens.
|
||||||
|
|
||||||
Please see the website for the current list of known working hardare:
|
Please see the website for the current list of known working hardare:
|
||||||
|
|
||||||
http://code.google.com/p/aceracpi/wiki/SupportedHardware
|
http://code.google.com/p/aceracpi/wiki/SupportedHardware
|
||||||
|
|
|
@ -20,7 +20,8 @@ moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
|
||||||
kernel 2.6.29 and release 0.22.
|
kernel 2.6.29 and release 0.22.
|
||||||
|
|
||||||
The driver is named "thinkpad-acpi". In some places, like module
|
The driver is named "thinkpad-acpi". In some places, like module
|
||||||
names, "thinkpad_acpi" is used because of userspace issues.
|
names and log messages, "thinkpad_acpi" is used because of userspace
|
||||||
|
issues.
|
||||||
|
|
||||||
"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
|
"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
|
||||||
long due to length limitations on some Linux kernel versions.
|
long due to length limitations on some Linux kernel versions.
|
||||||
|
@ -37,7 +38,7 @@ detailed description):
|
||||||
- ThinkLight on and off
|
- ThinkLight on and off
|
||||||
- limited docking and undocking
|
- limited docking and undocking
|
||||||
- UltraBay eject
|
- UltraBay eject
|
||||||
- CMOS control
|
- CMOS/UCMS control
|
||||||
- LED control
|
- LED control
|
||||||
- ACPI sounds
|
- ACPI sounds
|
||||||
- temperature sensors
|
- temperature sensors
|
||||||
|
@ -46,6 +47,7 @@ detailed description):
|
||||||
- Volume control
|
- Volume control
|
||||||
- Fan control and monitoring: fan speed, fan enable/disable
|
- Fan control and monitoring: fan speed, fan enable/disable
|
||||||
- WAN enable and disable
|
- WAN enable and disable
|
||||||
|
- UWB enable and disable
|
||||||
|
|
||||||
A compatibility table by model and feature is maintained on the web
|
A compatibility table by model and feature is maintained on the web
|
||||||
site, http://ibm-acpi.sf.net/. I appreciate any success or failure
|
site, http://ibm-acpi.sf.net/. I appreciate any success or failure
|
||||||
|
@ -53,7 +55,7 @@ reports, especially if they add to or correct the compatibility table.
|
||||||
Please include the following information in your report:
|
Please include the following information in your report:
|
||||||
|
|
||||||
- ThinkPad model name
|
- ThinkPad model name
|
||||||
- a copy of your DSDT, from /proc/acpi/dsdt
|
- a copy of your ACPI tables, using the "acpidump" utility
|
||||||
- a copy of the output of dmidecode, with serial numbers
|
- a copy of the output of dmidecode, with serial numbers
|
||||||
and UUIDs masked off
|
and UUIDs masked off
|
||||||
- which driver features work and which don't
|
- which driver features work and which don't
|
||||||
|
@ -66,17 +68,18 @@ Installation
|
||||||
------------
|
------------
|
||||||
|
|
||||||
If you are compiling this driver as included in the Linux kernel
|
If you are compiling this driver as included in the Linux kernel
|
||||||
sources, simply enable the CONFIG_THINKPAD_ACPI option, and optionally
|
sources, look for the CONFIG_THINKPAD_ACPI Kconfig option.
|
||||||
enable the CONFIG_THINKPAD_ACPI_BAY option if you want the
|
It is located on the menu path: "Device Drivers" -> "X86 Platform
|
||||||
thinkpad-specific bay functionality.
|
Specific Device Drivers" -> "ThinkPad ACPI Laptop Extras".
|
||||||
|
|
||||||
|
|
||||||
Features
|
Features
|
||||||
--------
|
--------
|
||||||
|
|
||||||
The driver exports two different interfaces to userspace, which can be
|
The driver exports two different interfaces to userspace, which can be
|
||||||
used to access the features it provides. One is a legacy procfs-based
|
used to access the features it provides. One is a legacy procfs-based
|
||||||
interface, which will be removed at some time in the distant future.
|
interface, which will be removed at some time in the future. The other
|
||||||
The other is a new sysfs-based interface which is not complete yet.
|
is a new sysfs-based interface which is not complete yet.
|
||||||
|
|
||||||
The procfs interface creates the /proc/acpi/ibm directory. There is a
|
The procfs interface creates the /proc/acpi/ibm directory. There is a
|
||||||
file under that directory for each feature it supports. The procfs
|
file under that directory for each feature it supports. The procfs
|
||||||
|
@ -111,15 +114,17 @@ The version of thinkpad-acpi's sysfs interface is exported by the driver
|
||||||
as a driver attribute (see below).
|
as a driver attribute (see below).
|
||||||
|
|
||||||
Sysfs driver attributes are on the driver's sysfs attribute space,
|
Sysfs driver attributes are on the driver's sysfs attribute space,
|
||||||
for 2.6.23 this is /sys/bus/platform/drivers/thinkpad_acpi/ and
|
for 2.6.23+ this is /sys/bus/platform/drivers/thinkpad_acpi/ and
|
||||||
/sys/bus/platform/drivers/thinkpad_hwmon/
|
/sys/bus/platform/drivers/thinkpad_hwmon/
|
||||||
|
|
||||||
Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
|
Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
|
||||||
space, for 2.6.23 this is /sys/devices/platform/thinkpad_acpi/.
|
space, for 2.6.23+ this is /sys/devices/platform/thinkpad_acpi/.
|
||||||
|
|
||||||
Sysfs device attributes for the sensors and fan are on the
|
Sysfs device attributes for the sensors and fan are on the
|
||||||
thinkpad_hwmon device's sysfs attribute space, but you should locate it
|
thinkpad_hwmon device's sysfs attribute space, but you should locate it
|
||||||
looking for a hwmon device with the name attribute of "thinkpad".
|
looking for a hwmon device with the name attribute of "thinkpad", or
|
||||||
|
better yet, through libsensors.
|
||||||
|
|
||||||
|
|
||||||
Driver version
|
Driver version
|
||||||
--------------
|
--------------
|
||||||
|
@ -129,6 +134,7 @@ sysfs driver attribute: version
|
||||||
|
|
||||||
The driver name and version. No commands can be written to this file.
|
The driver name and version. No commands can be written to this file.
|
||||||
|
|
||||||
|
|
||||||
Sysfs interface version
|
Sysfs interface version
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
|
@ -160,6 +166,7 @@ expect that an attribute might not be there, and deal with it properly
|
||||||
(an attribute not being there *is* a valid way to make it clear that a
|
(an attribute not being there *is* a valid way to make it clear that a
|
||||||
feature is not available in sysfs).
|
feature is not available in sysfs).
|
||||||
|
|
||||||
|
|
||||||
Hot keys
|
Hot keys
|
||||||
--------
|
--------
|
||||||
|
|
||||||
|
@ -172,17 +179,14 @@ system. Enabling the hotkey functionality of thinkpad-acpi signals the
|
||||||
firmware that such a driver is present, and modifies how the ThinkPad
|
firmware that such a driver is present, and modifies how the ThinkPad
|
||||||
firmware will behave in many situations.
|
firmware will behave in many situations.
|
||||||
|
|
||||||
The driver enables the hot key feature automatically when loaded. The
|
The driver enables the HKEY ("hot key") event reporting automatically
|
||||||
feature can later be disabled and enabled back at runtime. The driver
|
when loaded, and disables it when it is removed.
|
||||||
will also restore the hot key feature to its previous state and mask
|
|
||||||
when it is unloaded.
|
|
||||||
|
|
||||||
When the hotkey feature is enabled and the hot key mask is set (see
|
The driver will report HKEY events in the following format:
|
||||||
below), the driver will report HKEY events in the following format:
|
|
||||||
|
|
||||||
ibm/hotkey HKEY 00000080 0000xxxx
|
ibm/hotkey HKEY 00000080 0000xxxx
|
||||||
|
|
||||||
Some of these events refer to hot key presses, but not all.
|
Some of these events refer to hot key presses, but not all of them.
|
||||||
|
|
||||||
The driver will generate events over the input layer for hot keys and
|
The driver will generate events over the input layer for hot keys and
|
||||||
radio switches, and over the ACPI netlink layer for other events. The
|
radio switches, and over the ACPI netlink layer for other events. The
|
||||||
|
@ -214,13 +218,17 @@ procfs notes:
|
||||||
|
|
||||||
The following commands can be written to the /proc/acpi/ibm/hotkey file:
|
The following commands can be written to the /proc/acpi/ibm/hotkey file:
|
||||||
|
|
||||||
echo enable > /proc/acpi/ibm/hotkey -- enable the hot keys feature
|
|
||||||
echo disable > /proc/acpi/ibm/hotkey -- disable the hot keys feature
|
|
||||||
echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
|
echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
|
||||||
echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
|
echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
|
||||||
... any other 8-hex-digit mask ...
|
... any other 8-hex-digit mask ...
|
||||||
echo reset > /proc/acpi/ibm/hotkey -- restore the original mask
|
echo reset > /proc/acpi/ibm/hotkey -- restore the original mask
|
||||||
|
|
||||||
|
The following commands have been deprecated and will cause the kernel
|
||||||
|
to log a warning:
|
||||||
|
|
||||||
|
echo enable > /proc/acpi/ibm/hotkey -- does nothing
|
||||||
|
echo disable > /proc/acpi/ibm/hotkey -- returns an error
|
||||||
|
|
||||||
The procfs interface does not support NVRAM polling control. So as to
|
The procfs interface does not support NVRAM polling control. So as to
|
||||||
maintain maximum bug-to-bug compatibility, it does not report any masks,
|
maintain maximum bug-to-bug compatibility, it does not report any masks,
|
||||||
nor does it allow one to manipulate the hot key mask when the firmware
|
nor does it allow one to manipulate the hot key mask when the firmware
|
||||||
|
@ -229,12 +237,9 @@ does not support masks at all, even if NVRAM polling is in use.
|
||||||
sysfs notes:
|
sysfs notes:
|
||||||
|
|
||||||
hotkey_bios_enabled:
|
hotkey_bios_enabled:
|
||||||
Returns the status of the hot keys feature when
|
DEPRECATED, WILL BE REMOVED SOON.
|
||||||
thinkpad-acpi was loaded. Upon module unload, the hot
|
|
||||||
key feature status will be restored to this value.
|
|
||||||
|
|
||||||
0: hot keys were disabled
|
Returns 0.
|
||||||
1: hot keys were enabled (unusual)
|
|
||||||
|
|
||||||
hotkey_bios_mask:
|
hotkey_bios_mask:
|
||||||
Returns the hot keys mask when thinkpad-acpi was loaded.
|
Returns the hot keys mask when thinkpad-acpi was loaded.
|
||||||
|
@ -242,13 +247,10 @@ sysfs notes:
|
||||||
to this value.
|
to this value.
|
||||||
|
|
||||||
hotkey_enable:
|
hotkey_enable:
|
||||||
Enables/disables the hot keys feature in the ACPI
|
DEPRECATED, WILL BE REMOVED SOON.
|
||||||
firmware, and reports current status of the hot keys
|
|
||||||
feature. Has no effect on the NVRAM hot key polling
|
|
||||||
functionality.
|
|
||||||
|
|
||||||
0: disables the hot keys feature / feature disabled
|
0: returns -EPERM
|
||||||
1: enables the hot keys feature / feature enabled
|
1: does nothing
|
||||||
|
|
||||||
hotkey_mask:
|
hotkey_mask:
|
||||||
bit mask to enable driver-handling (and depending on
|
bit mask to enable driver-handling (and depending on
|
||||||
|
@ -618,6 +620,7 @@ For Lenovo models *with* ACPI backlight control:
|
||||||
and map them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN. Process
|
and map them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN. Process
|
||||||
these keys on userspace somehow (e.g. by calling xbacklight).
|
these keys on userspace somehow (e.g. by calling xbacklight).
|
||||||
|
|
||||||
|
|
||||||
Bluetooth
|
Bluetooth
|
||||||
---------
|
---------
|
||||||
|
|
||||||
|
@ -628,6 +631,9 @@ sysfs rfkill class: switch "tpacpi_bluetooth_sw"
|
||||||
This feature shows the presence and current state of a ThinkPad
|
This feature shows the presence and current state of a ThinkPad
|
||||||
Bluetooth device in the internal ThinkPad CDC slot.
|
Bluetooth device in the internal ThinkPad CDC slot.
|
||||||
|
|
||||||
|
If the ThinkPad supports it, the Bluetooth state is stored in NVRAM,
|
||||||
|
so it is kept across reboots and power-off.
|
||||||
|
|
||||||
Procfs notes:
|
Procfs notes:
|
||||||
|
|
||||||
If Bluetooth is installed, the following commands can be used:
|
If Bluetooth is installed, the following commands can be used:
|
||||||
|
@ -652,6 +658,7 @@ Sysfs notes:
|
||||||
rfkill controller switch "tpacpi_bluetooth_sw": refer to
|
rfkill controller switch "tpacpi_bluetooth_sw": refer to
|
||||||
Documentation/rfkill.txt for details.
|
Documentation/rfkill.txt for details.
|
||||||
|
|
||||||
|
|
||||||
Video output control -- /proc/acpi/ibm/video
|
Video output control -- /proc/acpi/ibm/video
|
||||||
--------------------------------------------
|
--------------------------------------------
|
||||||
|
|
||||||
|
@ -693,11 +700,8 @@ Fn-F7 from working. This also disables the video output switching
|
||||||
features of this driver, as it uses the same ACPI methods as
|
features of this driver, as it uses the same ACPI methods as
|
||||||
Fn-F7. Video switching on the console should still work.
|
Fn-F7. Video switching on the console should still work.
|
||||||
|
|
||||||
UPDATE: There's now a patch for the X.org Radeon driver which
|
UPDATE: refer to https://bugs.freedesktop.org/show_bug.cgi?id=2000
|
||||||
addresses this issue. Some people are reporting success with the patch
|
|
||||||
while others are still having problems. For more information:
|
|
||||||
|
|
||||||
https://bugs.freedesktop.org/show_bug.cgi?id=2000
|
|
||||||
|
|
||||||
ThinkLight control
|
ThinkLight control
|
||||||
------------------
|
------------------
|
||||||
|
@ -720,10 +724,11 @@ The ThinkLight sysfs interface is documented by the LED class
|
||||||
documentation, in Documentation/leds-class.txt. The ThinkLight LED name
|
documentation, in Documentation/leds-class.txt. The ThinkLight LED name
|
||||||
is "tpacpi::thinklight".
|
is "tpacpi::thinklight".
|
||||||
|
|
||||||
Due to limitations in the sysfs LED class, if the status of the thinklight
|
Due to limitations in the sysfs LED class, if the status of the ThinkLight
|
||||||
cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
|
cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
|
||||||
It is impossible to know if the status returned through sysfs is valid.
|
It is impossible to know if the status returned through sysfs is valid.
|
||||||
|
|
||||||
|
|
||||||
Docking / undocking -- /proc/acpi/ibm/dock
|
Docking / undocking -- /proc/acpi/ibm/dock
|
||||||
------------------------------------------
|
------------------------------------------
|
||||||
|
|
||||||
|
@ -784,6 +789,7 @@ the only docking stations currently supported are the X-series
|
||||||
UltraBase docks and "dumb" port replicators like the Mini Dock (the
|
UltraBase docks and "dumb" port replicators like the Mini Dock (the
|
||||||
latter don't need any ACPI support, actually).
|
latter don't need any ACPI support, actually).
|
||||||
|
|
||||||
|
|
||||||
UltraBay eject -- /proc/acpi/ibm/bay
|
UltraBay eject -- /proc/acpi/ibm/bay
|
||||||
------------------------------------
|
------------------------------------
|
||||||
|
|
||||||
|
@ -847,8 +853,9 @@ supported. Use "eject2" instead of "eject" for the second bay.
|
||||||
Note: the UltraBay eject support on the 600e/x, A22p and A3x is
|
Note: the UltraBay eject support on the 600e/x, A22p and A3x is
|
||||||
EXPERIMENTAL and may not work as expected. USE WITH CAUTION!
|
EXPERIMENTAL and may not work as expected. USE WITH CAUTION!
|
||||||
|
|
||||||
CMOS control
|
|
||||||
------------
|
CMOS/UCMS control
|
||||||
|
-----------------
|
||||||
|
|
||||||
procfs: /proc/acpi/ibm/cmos
|
procfs: /proc/acpi/ibm/cmos
|
||||||
sysfs device attribute: cmos_command
|
sysfs device attribute: cmos_command
|
||||||
|
@ -882,6 +889,7 @@ The cmos command interface is prone to firmware split-brain problems, as
|
||||||
in newer ThinkPads it is just a compatibility layer. Do not use it, it is
|
in newer ThinkPads it is just a compatibility layer. Do not use it, it is
|
||||||
exported just as a debug tool.
|
exported just as a debug tool.
|
||||||
|
|
||||||
|
|
||||||
LED control
|
LED control
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
|
@ -893,6 +901,17 @@ some older ThinkPad models, it is possible to query the status of the
|
||||||
LED indicators as well. Newer ThinkPads cannot query the real status
|
LED indicators as well. Newer ThinkPads cannot query the real status
|
||||||
of the LED indicators.
|
of the LED indicators.
|
||||||
|
|
||||||
|
Because misuse of the LEDs could induce an unaware user to perform
|
||||||
|
dangerous actions (like undocking or ejecting a bay device while the
|
||||||
|
buses are still active), or mask an important alarm (such as a nearly
|
||||||
|
empty battery, or a broken battery), access to most LEDs is
|
||||||
|
restricted.
|
||||||
|
|
||||||
|
Unrestricted access to all LEDs requires that thinkpad-acpi be
|
||||||
|
compiled with the CONFIG_THINKPAD_ACPI_UNSAFE_LEDS option enabled.
|
||||||
|
Distributions must never enable this option. Individual users that
|
||||||
|
are aware of the consequences are welcome to enabling it.
|
||||||
|
|
||||||
procfs notes:
|
procfs notes:
|
||||||
|
|
||||||
The available commands are:
|
The available commands are:
|
||||||
|
@ -939,6 +958,7 @@ ThinkPad indicator LED should blink in hardware accelerated mode, use the
|
||||||
"timer" trigger, and leave the delay_on and delay_off parameters set to
|
"timer" trigger, and leave the delay_on and delay_off parameters set to
|
||||||
zero (to request hardware acceleration autodetection).
|
zero (to request hardware acceleration autodetection).
|
||||||
|
|
||||||
|
|
||||||
ACPI sounds -- /proc/acpi/ibm/beep
|
ACPI sounds -- /proc/acpi/ibm/beep
|
||||||
----------------------------------
|
----------------------------------
|
||||||
|
|
||||||
|
@ -968,6 +988,7 @@ X40:
|
||||||
16 - one medium-pitched beep repeating constantly, stop with 17
|
16 - one medium-pitched beep repeating constantly, stop with 17
|
||||||
17 - stop 16
|
17 - stop 16
|
||||||
|
|
||||||
|
|
||||||
Temperature sensors
|
Temperature sensors
|
||||||
-------------------
|
-------------------
|
||||||
|
|
||||||
|
@ -1115,6 +1136,7 @@ registers contain the current battery capacity, etc. If you experiment
|
||||||
with this, do send me your results (including some complete dumps with
|
with this, do send me your results (including some complete dumps with
|
||||||
a description of the conditions when they were taken.)
|
a description of the conditions when they were taken.)
|
||||||
|
|
||||||
|
|
||||||
LCD brightness control
|
LCD brightness control
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
@ -1124,10 +1146,9 @@ sysfs backlight device "thinkpad_screen"
|
||||||
This feature allows software control of the LCD brightness on ThinkPad
|
This feature allows software control of the LCD brightness on ThinkPad
|
||||||
models which don't have a hardware brightness slider.
|
models which don't have a hardware brightness slider.
|
||||||
|
|
||||||
It has some limitations: the LCD backlight cannot be actually turned on or
|
It has some limitations: the LCD backlight cannot be actually turned
|
||||||
off by this interface, and in many ThinkPad models, the "dim while on
|
on or off by this interface, it just controls the backlight brightness
|
||||||
battery" functionality will be enabled by the BIOS when this interface is
|
level.
|
||||||
used, and cannot be controlled.
|
|
||||||
|
|
||||||
On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control
|
On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control
|
||||||
has eight brightness levels, ranging from 0 to 7. Some of the levels
|
has eight brightness levels, ranging from 0 to 7. Some of the levels
|
||||||
|
@ -1136,10 +1157,15 @@ display backlight brightness control methods have 16 levels, ranging
|
||||||
from 0 to 15.
|
from 0 to 15.
|
||||||
|
|
||||||
There are two interfaces to the firmware for direct brightness control,
|
There are two interfaces to the firmware for direct brightness control,
|
||||||
EC and CMOS. To select which one should be used, use the
|
EC and UCMS (or CMOS). To select which one should be used, use the
|
||||||
brightness_mode module parameter: brightness_mode=1 selects EC mode,
|
brightness_mode module parameter: brightness_mode=1 selects EC mode,
|
||||||
brightness_mode=2 selects CMOS mode, brightness_mode=3 selects both EC
|
brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
|
||||||
and CMOS. The driver tries to auto-detect which interface to use.
|
mode with NVRAM backing (so that brightness changes are remembered
|
||||||
|
across shutdown/reboot).
|
||||||
|
|
||||||
|
The driver tries to select which interface to use from a table of
|
||||||
|
defaults for each ThinkPad model. If it makes a wrong choice, please
|
||||||
|
report this as a bug, so that we can fix it.
|
||||||
|
|
||||||
When display backlight brightness controls are available through the
|
When display backlight brightness controls are available through the
|
||||||
standard ACPI interface, it is best to use it instead of this direct
|
standard ACPI interface, it is best to use it instead of this direct
|
||||||
|
@ -1201,6 +1227,7 @@ WARNING:
|
||||||
and maybe reduce the life of the backlight lamps by needlessly kicking
|
and maybe reduce the life of the backlight lamps by needlessly kicking
|
||||||
its level up and down at every change.
|
its level up and down at every change.
|
||||||
|
|
||||||
|
|
||||||
Volume control -- /proc/acpi/ibm/volume
|
Volume control -- /proc/acpi/ibm/volume
|
||||||
---------------------------------------
|
---------------------------------------
|
||||||
|
|
||||||
|
@ -1217,6 +1244,11 @@ distinct. The unmute the volume after the mute command, use either the
|
||||||
up or down command (the level command will not unmute the volume).
|
up or down command (the level command will not unmute the volume).
|
||||||
The current volume level and mute state is shown in the file.
|
The current volume level and mute state is shown in the file.
|
||||||
|
|
||||||
|
The ALSA mixer interface to this feature is still missing, but patches
|
||||||
|
to add it exist. That problem should be addressed in the not so
|
||||||
|
distant future.
|
||||||
|
|
||||||
|
|
||||||
Fan control and monitoring: fan speed, fan enable/disable
|
Fan control and monitoring: fan speed, fan enable/disable
|
||||||
---------------------------------------------------------
|
---------------------------------------------------------
|
||||||
|
|
||||||
|
@ -1383,8 +1415,11 @@ procfs: /proc/acpi/ibm/wan
|
||||||
sysfs device attribute: wwan_enable (deprecated)
|
sysfs device attribute: wwan_enable (deprecated)
|
||||||
sysfs rfkill class: switch "tpacpi_wwan_sw"
|
sysfs rfkill class: switch "tpacpi_wwan_sw"
|
||||||
|
|
||||||
This feature shows the presence and current state of a W-WAN (Sierra
|
This feature shows the presence and current state of the built-in
|
||||||
Wireless EV-DO) device.
|
Wireless WAN device.
|
||||||
|
|
||||||
|
If the ThinkPad supports it, the WWAN state is stored in NVRAM,
|
||||||
|
so it is kept across reboots and power-off.
|
||||||
|
|
||||||
It was tested on a Lenovo ThinkPad X60. It should probably work on other
|
It was tested on a Lenovo ThinkPad X60. It should probably work on other
|
||||||
ThinkPad models which come with this module installed.
|
ThinkPad models which come with this module installed.
|
||||||
|
@ -1413,6 +1448,7 @@ Sysfs notes:
|
||||||
rfkill controller switch "tpacpi_wwan_sw": refer to
|
rfkill controller switch "tpacpi_wwan_sw": refer to
|
||||||
Documentation/rfkill.txt for details.
|
Documentation/rfkill.txt for details.
|
||||||
|
|
||||||
|
|
||||||
EXPERIMENTAL: UWB
|
EXPERIMENTAL: UWB
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
@ -1431,6 +1467,7 @@ Sysfs notes:
|
||||||
rfkill controller switch "tpacpi_uwb_sw": refer to
|
rfkill controller switch "tpacpi_uwb_sw": refer to
|
||||||
Documentation/rfkill.txt for details.
|
Documentation/rfkill.txt for details.
|
||||||
|
|
||||||
|
|
||||||
Multiple Commands, Module Parameters
|
Multiple Commands, Module Parameters
|
||||||
------------------------------------
|
------------------------------------
|
||||||
|
|
||||||
|
@ -1445,6 +1482,7 @@ for example:
|
||||||
|
|
||||||
modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable
|
modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable
|
||||||
|
|
||||||
|
|
||||||
Enabling debugging output
|
Enabling debugging output
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
|
@ -1457,8 +1495,15 @@ will enable all debugging output classes. It takes a bitmask, so
|
||||||
to enable more than one output class, just add their values.
|
to enable more than one output class, just add their values.
|
||||||
|
|
||||||
Debug bitmask Description
|
Debug bitmask Description
|
||||||
|
0x8000 Disclose PID of userspace programs
|
||||||
|
accessing some functions of the driver
|
||||||
0x0001 Initialization and probing
|
0x0001 Initialization and probing
|
||||||
0x0002 Removal
|
0x0002 Removal
|
||||||
|
0x0004 RF Transmitter control (RFKILL)
|
||||||
|
(bluetooth, WWAN, UWB...)
|
||||||
|
0x0008 HKEY event interface, hotkeys
|
||||||
|
0x0010 Fan control
|
||||||
|
0x0020 Backlight brightness
|
||||||
|
|
||||||
There is also a kernel build option to enable more debugging
|
There is also a kernel build option to enable more debugging
|
||||||
information, which may be necessary to debug driver problems.
|
information, which may be necessary to debug driver problems.
|
||||||
|
@ -1467,6 +1512,7 @@ The level of debugging information output by the driver can be changed
|
||||||
at runtime through sysfs, using the driver attribute debug_level. The
|
at runtime through sysfs, using the driver attribute debug_level. The
|
||||||
attribute takes the same bitmask as the debug module parameter above.
|
attribute takes the same bitmask as the debug module parameter above.
|
||||||
|
|
||||||
|
|
||||||
Force loading of module
|
Force loading of module
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
|
@ -1505,3 +1551,7 @@ Sysfs interface changelog:
|
||||||
|
|
||||||
0x020200: Add poll()/select() support to the following attributes:
|
0x020200: Add poll()/select() support to the following attributes:
|
||||||
hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
|
hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
|
||||||
|
|
||||||
|
0x020300: hotkey enable/disable support removed, attributes
|
||||||
|
hotkey_bios_enabled and hotkey_enable deprecated and
|
||||||
|
marked for removal.
|
||||||
|
|
|
@ -164,15 +164,19 @@ All md devices contain:
|
||||||
raid_disks
|
raid_disks
|
||||||
a text file with a simple number indicating the number of devices
|
a text file with a simple number indicating the number of devices
|
||||||
in a fully functional array. If this is not yet known, the file
|
in a fully functional array. If this is not yet known, the file
|
||||||
will be empty. If an array is being resized (not currently
|
will be empty. If an array is being resized this will contain
|
||||||
possible) this will contain the larger of the old and new sizes.
|
the new number of devices.
|
||||||
Some raid level (RAID1) allow this value to be set while the
|
Some raid levels allow this value to be set while the array is
|
||||||
array is active. This will reconfigure the array. Otherwise
|
active. This will reconfigure the array. Otherwise it can only
|
||||||
it can only be set while assembling an array.
|
be set while assembling an array.
|
||||||
|
A change to this attribute will not be permitted if it would
|
||||||
|
reduce the size of the array. To reduce the number of drives
|
||||||
|
in an e.g. raid5, the array size must first be reduced by
|
||||||
|
setting the 'array_size' attribute.
|
||||||
|
|
||||||
chunk_size
|
chunk_size
|
||||||
This is the size if bytes for 'chunks' and is only relevant to
|
This is the size in bytes for 'chunks' and is only relevant to
|
||||||
raid levels that involve striping (1,4,5,6,10). The address space
|
raid levels that involve striping (0,4,5,6,10). The address space
|
||||||
of the array is conceptually divided into chunks and consecutive
|
of the array is conceptually divided into chunks and consecutive
|
||||||
chunks are striped onto neighbouring devices.
|
chunks are striped onto neighbouring devices.
|
||||||
The size should be at least PAGE_SIZE (4k) and should be a power
|
The size should be at least PAGE_SIZE (4k) and should be a power
|
||||||
|
@ -183,6 +187,20 @@ All md devices contain:
|
||||||
simply a number that is interpretted differently by different
|
simply a number that is interpretted differently by different
|
||||||
levels. It can be written while assembling an array.
|
levels. It can be written while assembling an array.
|
||||||
|
|
||||||
|
array_size
|
||||||
|
This can be used to artificially constrain the available space in
|
||||||
|
the array to be less than is actually available on the combined
|
||||||
|
devices. Writing a number (in Kilobytes) which is less than
|
||||||
|
the available size will set the size. Any reconfiguration of the
|
||||||
|
array (e.g. adding devices) will not cause the size to change.
|
||||||
|
Writing the word 'default' will cause the effective size of the
|
||||||
|
array to be whatever size is actually available based on
|
||||||
|
'level', 'chunk_size' and 'component_size'.
|
||||||
|
|
||||||
|
This can be used to reduce the size of the array before reducing
|
||||||
|
the number of devices in a raid4/5/6, or to support external
|
||||||
|
metadata formats which mandate such clipping.
|
||||||
|
|
||||||
reshape_position
|
reshape_position
|
||||||
This is either "none" or a sector number within the devices of
|
This is either "none" or a sector number within the devices of
|
||||||
the array where "reshape" is up to. If this is set, the three
|
the array where "reshape" is up to. If this is set, the three
|
||||||
|
@ -207,6 +225,11 @@ All md devices contain:
|
||||||
about the array. It can be 0.90 (traditional format), 1.0, 1.1,
|
about the array. It can be 0.90 (traditional format), 1.0, 1.1,
|
||||||
1.2 (newer format in varying locations) or "none" indicating that
|
1.2 (newer format in varying locations) or "none" indicating that
|
||||||
the kernel isn't managing metadata at all.
|
the kernel isn't managing metadata at all.
|
||||||
|
Alternately it can be "external:" followed by a string which
|
||||||
|
is set by user-space. This indicates that metadata is managed
|
||||||
|
by a user-space program. Any device failure or other event that
|
||||||
|
requires a metadata update will cause array activity to be
|
||||||
|
suspended until the event is acknowledged.
|
||||||
|
|
||||||
resync_start
|
resync_start
|
||||||
The point at which resync should start. If no resync is needed,
|
The point at which resync should start. If no resync is needed,
|
||||||
|
|
100
Documentation/networking/vxge.txt
Normal file
100
Documentation/networking/vxge.txt
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
Neterion's (Formerly S2io) X3100 Series 10GbE PCIe Server Adapter Linux driver
|
||||||
|
==============================================================================
|
||||||
|
|
||||||
|
Contents
|
||||||
|
--------
|
||||||
|
|
||||||
|
1) Introduction
|
||||||
|
2) Features supported
|
||||||
|
3) Configurable driver parameters
|
||||||
|
4) Troubleshooting
|
||||||
|
|
||||||
|
1) Introduction:
|
||||||
|
----------------
|
||||||
|
This Linux driver supports all Neterion's X3100 series 10 GbE PCIe I/O
|
||||||
|
Virtualized Server adapters.
|
||||||
|
The X3100 series supports four modes of operation, configurable via
|
||||||
|
firmware -
|
||||||
|
Single function mode
|
||||||
|
Multi function mode
|
||||||
|
SRIOV mode
|
||||||
|
MRIOV mode
|
||||||
|
The functions share a 10GbE link and the pci-e bus, but hardly anything else
|
||||||
|
inside the ASIC. Features like independent hw reset, statistics, bandwidth/
|
||||||
|
priority allocation and guarantees, GRO, TSO, interrupt moderation etc are
|
||||||
|
supported independently on each function.
|
||||||
|
|
||||||
|
(See below for a complete list of features supported for both IPv4 and IPv6)
|
||||||
|
|
||||||
|
2) Features supported:
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
i) Single function mode (up to 17 queues)
|
||||||
|
|
||||||
|
ii) Multi function mode (up to 17 functions)
|
||||||
|
|
||||||
|
iii) PCI-SIG's I/O Virtualization
|
||||||
|
- Single Root mode: v1.0 (up to 17 functions)
|
||||||
|
- Multi-Root mode: v1.0 (up to 17 functions)
|
||||||
|
|
||||||
|
iv) Jumbo frames
|
||||||
|
X3100 Series supports MTU up to 9600 bytes, modifiable using
|
||||||
|
ifconfig command.
|
||||||
|
|
||||||
|
v) Offloads supported: (Enabled by default)
|
||||||
|
Checksum offload (TCP/UDP/IP) on transmit and receive paths
|
||||||
|
TCP Segmentation Offload (TSO) on transmit path
|
||||||
|
Generic Receive Offload (GRO) on receive path
|
||||||
|
|
||||||
|
vi) MSI-X: (Enabled by default)
|
||||||
|
Resulting in noticeable performance improvement (up to 7% on certain
|
||||||
|
platforms).
|
||||||
|
|
||||||
|
vii) NAPI: (Enabled by default)
|
||||||
|
For better Rx interrupt moderation.
|
||||||
|
|
||||||
|
viii)RTH (Receive Traffic Hash): (Enabled by default)
|
||||||
|
Receive side steering for better scaling.
|
||||||
|
|
||||||
|
ix) Statistics
|
||||||
|
Comprehensive MAC-level and software statistics displayed using
|
||||||
|
"ethtool -S" option.
|
||||||
|
|
||||||
|
x) Multiple hardware queues: (Enabled by default)
|
||||||
|
Up to 17 hardware based transmit and receive data channels, with
|
||||||
|
multiple steering options (transmit multiqueue enabled by default).
|
||||||
|
|
||||||
|
3) Configurable driver parameters:
|
||||||
|
----------------------------------
|
||||||
|
|
||||||
|
i) max_config_dev
|
||||||
|
Specifies maximum device functions to be enabled.
|
||||||
|
Valid range: 1-8
|
||||||
|
|
||||||
|
ii) max_config_port
|
||||||
|
Specifies number of ports to be enabled.
|
||||||
|
Valid range: 1,2
|
||||||
|
Default: 1
|
||||||
|
|
||||||
|
iii)max_config_vpath
|
||||||
|
Specifies maximum VPATH(s) configured for each device function.
|
||||||
|
Valid range: 1-17
|
||||||
|
|
||||||
|
iv) vlan_tag_strip
|
||||||
|
Enables/disables vlan tag stripping from all received tagged frames that
|
||||||
|
are not replicated at the internal L2 switch.
|
||||||
|
Valid range: 0,1 (disabled, enabled respectively)
|
||||||
|
Default: 1
|
||||||
|
|
||||||
|
v) addr_learn_en
|
||||||
|
Enable learning the mac address of the guest OS interface in
|
||||||
|
virtualization environment.
|
||||||
|
Valid range: 0,1 (disabled, enabled respectively)
|
||||||
|
Default: 0
|
||||||
|
|
||||||
|
4) Troubleshooting:
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
To resolve an issue with the source code or X3100 series adapter, please collect
|
||||||
|
the statistics, register dumps using ethool, relevant logs and email them to
|
||||||
|
support@neterion.com.
|
|
@ -1,6 +1,6 @@
|
||||||
* Uploaded QE firmware
|
* Uploaded QE firmware
|
||||||
|
|
||||||
If a new firwmare has been uploaded to the QE (usually by the
|
If a new firmware has been uploaded to the QE (usually by the
|
||||||
boot loader), then a 'firmware' child node should be added to the QE
|
boot loader), then a 'firmware' child node should be added to the QE
|
||||||
node. This node provides information on the uploaded firmware that
|
node. This node provides information on the uploaded firmware that
|
||||||
device drivers may need.
|
device drivers may need.
|
||||||
|
|
|
@ -5,9 +5,21 @@ Required properties:
|
||||||
- reg : should specify localbus chip select and size used for the chip.
|
- reg : should specify localbus chip select and size used for the chip.
|
||||||
- fsl,upm-addr-offset : UPM pattern offset for the address latch.
|
- fsl,upm-addr-offset : UPM pattern offset for the address latch.
|
||||||
- fsl,upm-cmd-offset : UPM pattern offset for the command latch.
|
- fsl,upm-cmd-offset : UPM pattern offset for the command latch.
|
||||||
- gpios : may specify optional GPIO connected to the Ready-Not-Busy pin.
|
|
||||||
|
|
||||||
Example:
|
Optional properties:
|
||||||
|
- fsl,upm-wait-flags : add chip-dependent short delays after running the
|
||||||
|
UPM pattern (0x1), after writing a data byte (0x2) or after
|
||||||
|
writing out a buffer (0x4).
|
||||||
|
- fsl,upm-addr-line-cs-offsets : address offsets for multi-chip support.
|
||||||
|
The corresponding address lines are used to select the chip.
|
||||||
|
- gpios : may specify optional GPIOs connected to the Ready-Not-Busy pins
|
||||||
|
(R/B#). For multi-chip devices, "n" GPIO definitions are required
|
||||||
|
according to the number of chips.
|
||||||
|
- chip-delay : chip dependent delay for transfering data from array to
|
||||||
|
read registers (tR). Required if property "gpios" is not used
|
||||||
|
(R/B# pins not connected).
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
upm@1,0 {
|
upm@1,0 {
|
||||||
compatible = "fsl,upm-nand";
|
compatible = "fsl,upm-nand";
|
||||||
|
@ -26,3 +38,26 @@ upm@1,0 {
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
upm@3,0 {
|
||||||
|
#address-cells = <0>;
|
||||||
|
#size-cells = <0>;
|
||||||
|
compatible = "tqc,tqm8548-upm-nand", "fsl,upm-nand";
|
||||||
|
reg = <3 0x0 0x800>;
|
||||||
|
fsl,upm-addr-offset = <0x10>;
|
||||||
|
fsl,upm-cmd-offset = <0x08>;
|
||||||
|
/* Multi-chip NAND device */
|
||||||
|
fsl,upm-addr-line-cs-offsets = <0x0 0x200>;
|
||||||
|
fsl,upm-wait-flags = <0x5>;
|
||||||
|
chip-delay = <25>; // in micro-seconds
|
||||||
|
|
||||||
|
nand@0 {
|
||||||
|
#address-cells = <1>;
|
||||||
|
#size-cells = <1>;
|
||||||
|
|
||||||
|
partition@0 {
|
||||||
|
label = "fs";
|
||||||
|
reg = <0x00000000 0x10000000>;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
|
@ -1,15 +1,43 @@
|
||||||
LED connected to GPIO
|
LEDs connected to GPIO lines
|
||||||
|
|
||||||
Required properties:
|
Required properties:
|
||||||
- compatible : should be "gpio-led".
|
- compatible : should be "gpio-leds".
|
||||||
- label : (optional) the label for this LED. If omitted, the label is
|
|
||||||
|
Each LED is represented as a sub-node of the gpio-leds device. Each
|
||||||
|
node's name represents the name of the corresponding LED.
|
||||||
|
|
||||||
|
LED sub-node properties:
|
||||||
|
- gpios : Should specify the LED's GPIO, see "Specifying GPIO information
|
||||||
|
for devices" in Documentation/powerpc/booting-without-of.txt. Active
|
||||||
|
low LEDs should be indicated using flags in the GPIO specifier.
|
||||||
|
- label : (optional) The label for this LED. If omitted, the label is
|
||||||
taken from the node name (excluding the unit address).
|
taken from the node name (excluding the unit address).
|
||||||
- gpios : should specify LED GPIO.
|
- linux,default-trigger : (optional) This parameter, if present, is a
|
||||||
|
string defining the trigger assigned to the LED. Current triggers are:
|
||||||
|
"backlight" - LED will act as a back-light, controlled by the framebuffer
|
||||||
|
system
|
||||||
|
"default-on" - LED will turn on
|
||||||
|
"heartbeat" - LED "double" flashes at a load average based rate
|
||||||
|
"ide-disk" - LED indicates disk activity
|
||||||
|
"timer" - LED flashes at a fixed, configurable rate
|
||||||
|
|
||||||
Example:
|
Examples:
|
||||||
|
|
||||||
led@0 {
|
leds {
|
||||||
compatible = "gpio-led";
|
compatible = "gpio-leds";
|
||||||
label = "hdd";
|
hdd {
|
||||||
gpios = <&mcu_pio 0 1>;
|
label = "IDE Activity";
|
||||||
|
gpios = <&mcu_pio 0 1>; /* Active low */
|
||||||
|
linux,default-trigger = "ide-disk";
|
||||||
};
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
run-control {
|
||||||
|
compatible = "gpio-leds";
|
||||||
|
red {
|
||||||
|
gpios = <&mpc8572 6 0>;
|
||||||
|
};
|
||||||
|
green {
|
||||||
|
gpios = <&mpc8572 7 0>;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
|
@ -126,7 +126,7 @@ This uses the /cgroup virtual file system and "/cgroup/<cgroup>/cpu.rt_runtime_u
|
||||||
to control the CPU time reserved for each control group instead.
|
to control the CPU time reserved for each control group instead.
|
||||||
|
|
||||||
For more information on working with control groups, you should read
|
For more information on working with control groups, you should read
|
||||||
Documentation/cgroups.txt as well.
|
Documentation/cgroups/cgroups.txt as well.
|
||||||
|
|
||||||
Group settings are checked against the following limits in order to keep the configuration
|
Group settings are checked against the following limits in order to keep the configuration
|
||||||
schedulable:
|
schedulable:
|
||||||
|
|
|
@ -60,17 +60,9 @@ Supported Cards/Chipsets
|
||||||
9005:0285:9005:02d5 Adaptec ASR-2405 (Voodoo40 Lite)
|
9005:0285:9005:02d5 Adaptec ASR-2405 (Voodoo40 Lite)
|
||||||
9005:0285:9005:02d6 Adaptec ASR-2445 (Voodoo44 Lite)
|
9005:0285:9005:02d6 Adaptec ASR-2445 (Voodoo44 Lite)
|
||||||
9005:0285:9005:02d7 Adaptec ASR-2805 (Voodoo80 Lite)
|
9005:0285:9005:02d7 Adaptec ASR-2805 (Voodoo80 Lite)
|
||||||
9005:0285:9005:02d8 Adaptec 5405G (Voodoo40 PM)
|
9005:0285:9005:02d8 Adaptec 5405Z (Voodoo40 BLBU)
|
||||||
9005:0285:9005:02d9 Adaptec 5445G (Voodoo44 PM)
|
9005:0285:9005:02d9 Adaptec 5445Z (Voodoo44 BLBU)
|
||||||
9005:0285:9005:02da Adaptec 5805G (Voodoo80 PM)
|
9005:0285:9005:02da Adaptec 5805Z (Voodoo80 BLBU)
|
||||||
9005:0285:9005:02db Adaptec 5085G (Voodoo08 PM)
|
|
||||||
9005:0285:9005:02dc Adaptec 51245G (Voodoo124 PM)
|
|
||||||
9005:0285:9005:02dd Adaptec 51645G (Voodoo164 PM)
|
|
||||||
9005:0285:9005:02de Adaptec 52445G (Voodoo244 PM)
|
|
||||||
9005:0285:9005:02df Adaptec ASR-2045G (Voodoo04 Lite PM)
|
|
||||||
9005:0285:9005:02e0 Adaptec ASR-2405G (Voodoo40 Lite PM)
|
|
||||||
9005:0285:9005:02e1 Adaptec ASR-2445G (Voodoo44 Lite PM)
|
|
||||||
9005:0285:9005:02e2 Adaptec ASR-2805G (Voodoo80 Lite PM)
|
|
||||||
1011:0046:9005:0364 Adaptec 5400S (Mustang)
|
1011:0046:9005:0364 Adaptec 5400S (Mustang)
|
||||||
1011:0046:9005:0365 Adaptec 5400S (Mustang)
|
1011:0046:9005:0365 Adaptec 5400S (Mustang)
|
||||||
9005:0287:9005:0800 Adaptec Themisto (Jupiter)
|
9005:0287:9005:0800 Adaptec Themisto (Jupiter)
|
||||||
|
@ -140,6 +132,7 @@ Deanna Bonds (non-DASD support, PAE fibs and 64 bit,
|
||||||
where fibs that go to the hardware are consistently called hw_fibs and
|
where fibs that go to the hardware are consistently called hw_fibs and
|
||||||
not just fibs like the name of the driver tracking structure)
|
not just fibs like the name of the driver tracking structure)
|
||||||
Mark Salyzyn <Mark_Salyzyn@adaptec.com> Fixed panic issues and added some new product ids for upcoming hbas. Performance tuning, card failover and bug mitigations.
|
Mark Salyzyn <Mark_Salyzyn@adaptec.com> Fixed panic issues and added some new product ids for upcoming hbas. Performance tuning, card failover and bug mitigations.
|
||||||
|
Achim Leubner <Achim_Leubner@adaptec.com>
|
||||||
|
|
||||||
Original Driver
|
Original Driver
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
174
Documentation/slow-work.txt
Normal file
174
Documentation/slow-work.txt
Normal file
|
@ -0,0 +1,174 @@
|
||||||
|
====================================
|
||||||
|
SLOW WORK ITEM EXECUTION THREAD POOL
|
||||||
|
====================================
|
||||||
|
|
||||||
|
By: David Howells <dhowells@redhat.com>
|
||||||
|
|
||||||
|
The slow work item execution thread pool is a pool of threads for performing
|
||||||
|
things that take a relatively long time, such as making mkdir calls.
|
||||||
|
Typically, when processing something, these items will spend a lot of time
|
||||||
|
blocking a thread on I/O, thus making that thread unavailable for doing other
|
||||||
|
work.
|
||||||
|
|
||||||
|
The standard workqueue model is unsuitable for this class of work item as that
|
||||||
|
limits the owner to a single thread or a single thread per CPU. For some
|
||||||
|
tasks, however, more threads - or fewer - are required.
|
||||||
|
|
||||||
|
There is just one pool per system. It contains no threads unless something
|
||||||
|
wants to use it - and that something must register its interest first. When
|
||||||
|
the pool is active, the number of threads it contains is dynamic, varying
|
||||||
|
between a maximum and minimum setting, depending on the load.
|
||||||
|
|
||||||
|
|
||||||
|
====================
|
||||||
|
CLASSES OF WORK ITEM
|
||||||
|
====================
|
||||||
|
|
||||||
|
This pool support two classes of work items:
|
||||||
|
|
||||||
|
(*) Slow work items.
|
||||||
|
|
||||||
|
(*) Very slow work items.
|
||||||
|
|
||||||
|
The former are expected to finish much quicker than the latter.
|
||||||
|
|
||||||
|
An operation of the very slow class may do a batch combination of several
|
||||||
|
lookups, mkdirs, and a create for instance.
|
||||||
|
|
||||||
|
An operation of the ordinarily slow class may, for example, write stuff or
|
||||||
|
expand files, provided the time taken to do so isn't too long.
|
||||||
|
|
||||||
|
Operations of both types may sleep during execution, thus tying up the thread
|
||||||
|
loaned to it.
|
||||||
|
|
||||||
|
|
||||||
|
THREAD-TO-CLASS ALLOCATION
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Not all the threads in the pool are available to work on very slow work items.
|
||||||
|
The number will be between one and one fewer than the number of active threads.
|
||||||
|
This is configurable (see the "Pool Configuration" section).
|
||||||
|
|
||||||
|
All the threads are available to work on ordinarily slow work items, but a
|
||||||
|
percentage of the threads will prefer to work on very slow work items.
|
||||||
|
|
||||||
|
The configuration ensures that at least one thread will be available to work on
|
||||||
|
very slow work items, and at least one thread will be available that won't work
|
||||||
|
on very slow work items at all.
|
||||||
|
|
||||||
|
|
||||||
|
=====================
|
||||||
|
USING SLOW WORK ITEMS
|
||||||
|
=====================
|
||||||
|
|
||||||
|
Firstly, a module or subsystem wanting to make use of slow work items must
|
||||||
|
register its interest:
|
||||||
|
|
||||||
|
int ret = slow_work_register_user();
|
||||||
|
|
||||||
|
This will return 0 if successful, or a -ve error upon failure.
|
||||||
|
|
||||||
|
|
||||||
|
Slow work items may then be set up by:
|
||||||
|
|
||||||
|
(1) Declaring a slow_work struct type variable:
|
||||||
|
|
||||||
|
#include <linux/slow-work.h>
|
||||||
|
|
||||||
|
struct slow_work myitem;
|
||||||
|
|
||||||
|
(2) Declaring the operations to be used for this item:
|
||||||
|
|
||||||
|
struct slow_work_ops myitem_ops = {
|
||||||
|
.get_ref = myitem_get_ref,
|
||||||
|
.put_ref = myitem_put_ref,
|
||||||
|
.execute = myitem_execute,
|
||||||
|
};
|
||||||
|
|
||||||
|
[*] For a description of the ops, see section "Item Operations".
|
||||||
|
|
||||||
|
(3) Initialising the item:
|
||||||
|
|
||||||
|
slow_work_init(&myitem, &myitem_ops);
|
||||||
|
|
||||||
|
or:
|
||||||
|
|
||||||
|
vslow_work_init(&myitem, &myitem_ops);
|
||||||
|
|
||||||
|
depending on its class.
|
||||||
|
|
||||||
|
A suitably set up work item can then be enqueued for processing:
|
||||||
|
|
||||||
|
int ret = slow_work_enqueue(&myitem);
|
||||||
|
|
||||||
|
This will return a -ve error if the thread pool is unable to gain a reference
|
||||||
|
on the item, 0 otherwise.
|
||||||
|
|
||||||
|
|
||||||
|
The items are reference counted, so there ought to be no need for a flush
|
||||||
|
operation. When all a module's slow work items have been processed, and the
|
||||||
|
module has no further interest in the facility, it should unregister its
|
||||||
|
interest:
|
||||||
|
|
||||||
|
slow_work_unregister_user();
|
||||||
|
|
||||||
|
|
||||||
|
===============
|
||||||
|
ITEM OPERATIONS
|
||||||
|
===============
|
||||||
|
|
||||||
|
Each work item requires a table of operations of type struct slow_work_ops.
|
||||||
|
All members are required:
|
||||||
|
|
||||||
|
(*) Get a reference on an item:
|
||||||
|
|
||||||
|
int (*get_ref)(struct slow_work *work);
|
||||||
|
|
||||||
|
This allows the thread pool to attempt to pin an item by getting a
|
||||||
|
reference on it. This function should return 0 if the reference was
|
||||||
|
granted, or a -ve error otherwise. If an error is returned,
|
||||||
|
slow_work_enqueue() will fail.
|
||||||
|
|
||||||
|
The reference is held whilst the item is queued and whilst it is being
|
||||||
|
executed. The item may then be requeued with the same reference held, or
|
||||||
|
the reference will be released.
|
||||||
|
|
||||||
|
(*) Release a reference on an item:
|
||||||
|
|
||||||
|
void (*put_ref)(struct slow_work *work);
|
||||||
|
|
||||||
|
This allows the thread pool to unpin an item by releasing the reference on
|
||||||
|
it. The thread pool will not touch the item again once this has been
|
||||||
|
called.
|
||||||
|
|
||||||
|
(*) Execute an item:
|
||||||
|
|
||||||
|
void (*execute)(struct slow_work *work);
|
||||||
|
|
||||||
|
This should perform the work required of the item. It may sleep, it may
|
||||||
|
perform disk I/O and it may wait for locks.
|
||||||
|
|
||||||
|
|
||||||
|
==================
|
||||||
|
POOL CONFIGURATION
|
||||||
|
==================
|
||||||
|
|
||||||
|
The slow-work thread pool has a number of configurables:
|
||||||
|
|
||||||
|
(*) /proc/sys/kernel/slow-work/min-threads
|
||||||
|
|
||||||
|
The minimum number of threads that should be in the pool whilst it is in
|
||||||
|
use. This may be anywhere between 2 and max-threads.
|
||||||
|
|
||||||
|
(*) /proc/sys/kernel/slow-work/max-threads
|
||||||
|
|
||||||
|
The maximum number of threads that should in the pool. This may be
|
||||||
|
anywhere between min-threads and 255 or NR_CPUS * 2, whichever is greater.
|
||||||
|
|
||||||
|
(*) /proc/sys/kernel/slow-work/vslow-percentage
|
||||||
|
|
||||||
|
The percentage of active threads in the pool that may be used to execute
|
||||||
|
very slow work items. This may be between 1 and 99. The resultant number
|
||||||
|
is bounded to between 1 and one fewer than the number of active threads.
|
||||||
|
This ensures there is always at least one thread that can process very
|
||||||
|
slow work items, and always at least one thread that won't.
|
71
Documentation/sound/alsa/soc/jack.txt
Normal file
71
Documentation/sound/alsa/soc/jack.txt
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
ASoC jack detection
|
||||||
|
===================
|
||||||
|
|
||||||
|
ALSA has a standard API for representing physical jacks to user space,
|
||||||
|
the kernel side of which can be seen in include/sound/jack.h. ASoC
|
||||||
|
provides a version of this API adding two additional features:
|
||||||
|
|
||||||
|
- It allows more than one jack detection method to work together on one
|
||||||
|
user visible jack. In embedded systems it is common for multiple
|
||||||
|
to be present on a single jack but handled by separate bits of
|
||||||
|
hardware.
|
||||||
|
|
||||||
|
- Integration with DAPM, allowing DAPM endpoints to be updated
|
||||||
|
automatically based on the detected jack status (eg, turning off the
|
||||||
|
headphone outputs if no headphones are present).
|
||||||
|
|
||||||
|
This is done by splitting the jacks up into three things working
|
||||||
|
together: the jack itself represented by a struct snd_soc_jack, sets of
|
||||||
|
snd_soc_jack_pins representing DAPM endpoints to update and blocks of
|
||||||
|
code providing jack reporting mechanisms.
|
||||||
|
|
||||||
|
For example, a system may have a stereo headset jack with two reporting
|
||||||
|
mechanisms, one for the headphone and one for the microphone. Some
|
||||||
|
systems won't be able to use their speaker output while a headphone is
|
||||||
|
connected and so will want to make sure to update both speaker and
|
||||||
|
headphone when the headphone jack status changes.
|
||||||
|
|
||||||
|
The jack - struct snd_soc_jack
|
||||||
|
==============================
|
||||||
|
|
||||||
|
This represents a physical jack on the system and is what is visible to
|
||||||
|
user space. The jack itself is completely passive, it is set up by the
|
||||||
|
machine driver and updated by jack detection methods.
|
||||||
|
|
||||||
|
Jacks are created by the machine driver calling snd_soc_jack_new().
|
||||||
|
|
||||||
|
snd_soc_jack_pin
|
||||||
|
================
|
||||||
|
|
||||||
|
These represent a DAPM pin to update depending on some of the status
|
||||||
|
bits supported by the jack. Each snd_soc_jack has zero or more of these
|
||||||
|
which are updated automatically. They are created by the machine driver
|
||||||
|
and associated with the jack using snd_soc_jack_add_pins(). The status
|
||||||
|
of the endpoint may configured to be the opposite of the jack status if
|
||||||
|
required (eg, enabling a built in microphone if a microphone is not
|
||||||
|
connected via a jack).
|
||||||
|
|
||||||
|
Jack detection methods
|
||||||
|
======================
|
||||||
|
|
||||||
|
Actual jack detection is done by code which is able to monitor some
|
||||||
|
input to the system and update a jack by calling snd_soc_jack_report(),
|
||||||
|
specifying a subset of bits to update. The jack detection code should
|
||||||
|
be set up by the machine driver, taking configuration for the jack to
|
||||||
|
update and the set of things to report when the jack is connected.
|
||||||
|
|
||||||
|
Often this is done based on the status of a GPIO - a handler for this is
|
||||||
|
provided by the snd_soc_jack_add_gpio() function. Other methods are
|
||||||
|
also available, for example integrated into CODECs. One example of
|
||||||
|
CODEC integrated jack detection can be see in the WM8350 driver.
|
||||||
|
|
||||||
|
Each jack may have multiple reporting mechanisms, though it will need at
|
||||||
|
least one to be useful.
|
||||||
|
|
||||||
|
Machine drivers
|
||||||
|
===============
|
||||||
|
|
||||||
|
These are all hooked together by the machine driver depending on the
|
||||||
|
system hardware. The machine driver will set up the snd_soc_jack and
|
||||||
|
the list of pins to update then set up one or more jack detection
|
||||||
|
mechanisms to update that jack based on their current status.
|
|
@ -10,6 +10,8 @@ fs.txt
|
||||||
- documentation for /proc/sys/fs/*.
|
- documentation for /proc/sys/fs/*.
|
||||||
kernel.txt
|
kernel.txt
|
||||||
- documentation for /proc/sys/kernel/*.
|
- documentation for /proc/sys/kernel/*.
|
||||||
|
net.txt
|
||||||
|
- documentation for /proc/sys/net/*.
|
||||||
sunrpc.txt
|
sunrpc.txt
|
||||||
- documentation for /proc/sys/sunrpc/*.
|
- documentation for /proc/sys/sunrpc/*.
|
||||||
vm.txt
|
vm.txt
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
Documentation for /proc/sys/fs/* kernel version 2.2.10
|
Documentation for /proc/sys/fs/* kernel version 2.2.10
|
||||||
(c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
|
(c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
|
||||||
|
(c) 2009, Shen Feng<shen@cn.fujitsu.com>
|
||||||
|
|
||||||
For general info and legal blurb, please look in README.
|
For general info and legal blurb, please look in README.
|
||||||
|
|
||||||
|
@ -14,7 +15,12 @@ kernel. Since some of the files _can_ be used to screw up your
|
||||||
system, it is advisable to read both documentation and source
|
system, it is advisable to read both documentation and source
|
||||||
before actually making adjustments.
|
before actually making adjustments.
|
||||||
|
|
||||||
|
1. /proc/sys/fs
|
||||||
|
----------------------------------------------------------
|
||||||
|
|
||||||
Currently, these files are in /proc/sys/fs:
|
Currently, these files are in /proc/sys/fs:
|
||||||
|
- aio-max-nr
|
||||||
|
- aio-nr
|
||||||
- dentry-state
|
- dentry-state
|
||||||
- dquot-max
|
- dquot-max
|
||||||
- dquot-nr
|
- dquot-nr
|
||||||
|
@ -30,8 +36,15 @@ Currently, these files are in /proc/sys/fs:
|
||||||
- super-max
|
- super-max
|
||||||
- super-nr
|
- super-nr
|
||||||
|
|
||||||
Documentation for the files in /proc/sys/fs/binfmt_misc is
|
==============================================================
|
||||||
in Documentation/binfmt_misc.txt.
|
|
||||||
|
aio-nr & aio-max-nr:
|
||||||
|
|
||||||
|
aio-nr is the running total of the number of events specified on the
|
||||||
|
io_setup system call for all currently active aio contexts. If aio-nr
|
||||||
|
reaches aio-max-nr then io_setup will fail with EAGAIN. Note that
|
||||||
|
raising aio-max-nr does not result in the pre-allocation or re-sizing
|
||||||
|
of any kernel data structures.
|
||||||
|
|
||||||
==============================================================
|
==============================================================
|
||||||
|
|
||||||
|
@ -178,3 +191,60 @@ requests. aio-max-nr allows you to change the maximum value
|
||||||
aio-nr can grow to.
|
aio-nr can grow to.
|
||||||
|
|
||||||
==============================================================
|
==============================================================
|
||||||
|
|
||||||
|
|
||||||
|
2. /proc/sys/fs/binfmt_misc
|
||||||
|
----------------------------------------------------------
|
||||||
|
|
||||||
|
Documentation for the files in /proc/sys/fs/binfmt_misc is
|
||||||
|
in Documentation/binfmt_misc.txt.
|
||||||
|
|
||||||
|
|
||||||
|
3. /proc/sys/fs/mqueue - POSIX message queues filesystem
|
||||||
|
----------------------------------------------------------
|
||||||
|
|
||||||
|
The "mqueue" filesystem provides the necessary kernel features to enable the
|
||||||
|
creation of a user space library that implements the POSIX message queues
|
||||||
|
API (as noted by the MSG tag in the POSIX 1003.1-2001 version of the System
|
||||||
|
Interfaces specification.)
|
||||||
|
|
||||||
|
The "mqueue" filesystem contains values for determining/setting the amount of
|
||||||
|
resources used by the file system.
|
||||||
|
|
||||||
|
/proc/sys/fs/mqueue/queues_max is a read/write file for setting/getting the
|
||||||
|
maximum number of message queues allowed on the system.
|
||||||
|
|
||||||
|
/proc/sys/fs/mqueue/msg_max is a read/write file for setting/getting the
|
||||||
|
maximum number of messages in a queue value. In fact it is the limiting value
|
||||||
|
for another (user) limit which is set in mq_open invocation. This attribute of
|
||||||
|
a queue must be less or equal then msg_max.
|
||||||
|
|
||||||
|
/proc/sys/fs/mqueue/msgsize_max is a read/write file for setting/getting the
|
||||||
|
maximum message size value (it is every message queue's attribute set during
|
||||||
|
its creation).
|
||||||
|
|
||||||
|
|
||||||
|
4. /proc/sys/fs/epoll - Configuration options for the epoll interface
|
||||||
|
--------------------------------------------------------
|
||||||
|
|
||||||
|
This directory contains configuration options for the epoll(7) interface.
|
||||||
|
|
||||||
|
max_user_instances
|
||||||
|
------------------
|
||||||
|
|
||||||
|
This is the maximum number of epoll file descriptors that a single user can
|
||||||
|
have open at a given time. The default value is 128, and should be enough
|
||||||
|
for normal users.
|
||||||
|
|
||||||
|
max_user_watches
|
||||||
|
----------------
|
||||||
|
|
||||||
|
Every epoll file descriptor can store a number of files to be monitored
|
||||||
|
for event readiness. Each one of these monitored files constitutes a "watch".
|
||||||
|
This configuration option sets the maximum number of "watches" that are
|
||||||
|
allowed for each user.
|
||||||
|
Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
|
||||||
|
on a 64bit one.
|
||||||
|
The current default value for max_user_watches is the 1/32 of the available
|
||||||
|
low memory, divided for the "watch" cost in bytes.
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
Documentation for /proc/sys/kernel/* kernel version 2.2.10
|
Documentation for /proc/sys/kernel/* kernel version 2.2.10
|
||||||
(c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
|
(c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
|
||||||
|
(c) 2009, Shen Feng<shen@cn.fujitsu.com>
|
||||||
|
|
||||||
For general info and legal blurb, please look in README.
|
For general info and legal blurb, please look in README.
|
||||||
|
|
||||||
|
@ -18,6 +19,7 @@ Currently, these files might (depending on your configuration)
|
||||||
show up in /proc/sys/kernel:
|
show up in /proc/sys/kernel:
|
||||||
- acpi_video_flags
|
- acpi_video_flags
|
||||||
- acct
|
- acct
|
||||||
|
- auto_msgmni
|
||||||
- core_pattern
|
- core_pattern
|
||||||
- core_uses_pid
|
- core_uses_pid
|
||||||
- ctrl-alt-del
|
- ctrl-alt-del
|
||||||
|
@ -33,6 +35,7 @@ show up in /proc/sys/kernel:
|
||||||
- msgmax
|
- msgmax
|
||||||
- msgmnb
|
- msgmnb
|
||||||
- msgmni
|
- msgmni
|
||||||
|
- nmi_watchdog
|
||||||
- osrelease
|
- osrelease
|
||||||
- ostype
|
- ostype
|
||||||
- overflowgid
|
- overflowgid
|
||||||
|
@ -40,6 +43,7 @@ show up in /proc/sys/kernel:
|
||||||
- panic
|
- panic
|
||||||
- pid_max
|
- pid_max
|
||||||
- powersave-nap [ PPC only ]
|
- powersave-nap [ PPC only ]
|
||||||
|
- panic_on_unrecovered_nmi
|
||||||
- printk
|
- printk
|
||||||
- randomize_va_space
|
- randomize_va_space
|
||||||
- real-root-dev ==> Documentation/initrd.txt
|
- real-root-dev ==> Documentation/initrd.txt
|
||||||
|
@ -55,6 +59,7 @@ show up in /proc/sys/kernel:
|
||||||
- sysrq ==> Documentation/sysrq.txt
|
- sysrq ==> Documentation/sysrq.txt
|
||||||
- tainted
|
- tainted
|
||||||
- threads-max
|
- threads-max
|
||||||
|
- unknown_nmi_panic
|
||||||
- version
|
- version
|
||||||
|
|
||||||
==============================================================
|
==============================================================
|
||||||
|
@ -381,3 +386,51 @@ can be ORed together:
|
||||||
512 - A kernel warning has occurred.
|
512 - A kernel warning has occurred.
|
||||||
1024 - A module from drivers/staging was loaded.
|
1024 - A module from drivers/staging was loaded.
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
|
auto_msgmni:
|
||||||
|
|
||||||
|
Enables/Disables automatic recomputing of msgmni upon memory add/remove or
|
||||||
|
upon ipc namespace creation/removal (see the msgmni description above).
|
||||||
|
Echoing "1" into this file enables msgmni automatic recomputing.
|
||||||
|
Echoing "0" turns it off.
|
||||||
|
auto_msgmni default value is 1.
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
|
nmi_watchdog:
|
||||||
|
|
||||||
|
Enables/Disables the NMI watchdog on x86 systems. When the value is non-zero
|
||||||
|
the NMI watchdog is enabled and will continuously test all online cpus to
|
||||||
|
determine whether or not they are still functioning properly. Currently,
|
||||||
|
passing "nmi_watchdog=" parameter at boot time is required for this function
|
||||||
|
to work.
|
||||||
|
|
||||||
|
If LAPIC NMI watchdog method is in use (nmi_watchdog=2 kernel parameter), the
|
||||||
|
NMI watchdog shares registers with oprofile. By disabling the NMI watchdog,
|
||||||
|
oprofile may have more registers to utilize.
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
|
unknown_nmi_panic:
|
||||||
|
|
||||||
|
The value in this file affects behavior of handling NMI. When the value is
|
||||||
|
non-zero, unknown NMI is trapped and then panic occurs. At that time, kernel
|
||||||
|
debugging information is displayed on console.
|
||||||
|
|
||||||
|
NMI switch that most IA32 servers have fires unknown NMI up, for example.
|
||||||
|
If a system hangs up, try pressing the NMI switch.
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
|
panic_on_unrecovered_nmi:
|
||||||
|
|
||||||
|
The default Linux behaviour on an NMI of either memory or unknown is to continue
|
||||||
|
operation. For many environments such as scientific computing it is preferable
|
||||||
|
that the box is taken out and the error dealt with than an uncorrected
|
||||||
|
parity/ECC error get propogated.
|
||||||
|
|
||||||
|
A small number of systems do generate NMI's for bizarre random reasons such as
|
||||||
|
power management so the default is off. That sysctl works like the existing
|
||||||
|
panic controls already in that directory.
|
||||||
|
|
||||||
|
|
175
Documentation/sysctl/net.txt
Normal file
175
Documentation/sysctl/net.txt
Normal file
|
@ -0,0 +1,175 @@
|
||||||
|
Documentation for /proc/sys/net/* kernel version 2.4.0-test11-pre4
|
||||||
|
(c) 1999 Terrehon Bowden <terrehon@pacbell.net>
|
||||||
|
Bodo Bauer <bb@ricochet.net>
|
||||||
|
(c) 2000 Jorge Nerin <comandante@zaralinux.com>
|
||||||
|
(c) 2009 Shen Feng <shen@cn.fujitsu.com>
|
||||||
|
|
||||||
|
For general info and legal blurb, please look in README.
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
|
This file contains the documentation for the sysctl files in
|
||||||
|
/proc/sys/net and is valid for Linux kernel version 2.4.0-test11-pre4.
|
||||||
|
|
||||||
|
The interface to the networking parts of the kernel is located in
|
||||||
|
/proc/sys/net. The following table shows all possible subdirectories.You may
|
||||||
|
see only some of them, depending on your kernel's configuration.
|
||||||
|
|
||||||
|
|
||||||
|
Table : Subdirectories in /proc/sys/net
|
||||||
|
..............................................................................
|
||||||
|
Directory Content Directory Content
|
||||||
|
core General parameter appletalk Appletalk protocol
|
||||||
|
unix Unix domain sockets netrom NET/ROM
|
||||||
|
802 E802 protocol ax25 AX25
|
||||||
|
ethernet Ethernet protocol rose X.25 PLP layer
|
||||||
|
ipv4 IP version 4 x25 X.25 protocol
|
||||||
|
ipx IPX token-ring IBM token ring
|
||||||
|
bridge Bridging decnet DEC net
|
||||||
|
ipv6 IP version 6
|
||||||
|
..............................................................................
|
||||||
|
|
||||||
|
1. /proc/sys/net/core - Network core options
|
||||||
|
-------------------------------------------------------
|
||||||
|
|
||||||
|
rmem_default
|
||||||
|
------------
|
||||||
|
|
||||||
|
The default setting of the socket receive buffer in bytes.
|
||||||
|
|
||||||
|
rmem_max
|
||||||
|
--------
|
||||||
|
|
||||||
|
The maximum receive socket buffer size in bytes.
|
||||||
|
|
||||||
|
wmem_default
|
||||||
|
------------
|
||||||
|
|
||||||
|
The default setting (in bytes) of the socket send buffer.
|
||||||
|
|
||||||
|
wmem_max
|
||||||
|
--------
|
||||||
|
|
||||||
|
The maximum send socket buffer size in bytes.
|
||||||
|
|
||||||
|
message_burst and message_cost
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
These parameters are used to limit the warning messages written to the kernel
|
||||||
|
log from the networking code. They enforce a rate limit to make a
|
||||||
|
denial-of-service attack impossible. A higher message_cost factor, results in
|
||||||
|
fewer messages that will be written. Message_burst controls when messages will
|
||||||
|
be dropped. The default settings limit warning messages to one every five
|
||||||
|
seconds.
|
||||||
|
|
||||||
|
warnings
|
||||||
|
--------
|
||||||
|
|
||||||
|
This controls console messages from the networking stack that can occur because
|
||||||
|
of problems on the network like duplicate address or bad checksums. Normally,
|
||||||
|
this should be enabled, but if the problem persists the messages can be
|
||||||
|
disabled.
|
||||||
|
|
||||||
|
netdev_budget
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Maximum number of packets taken from all interfaces in one polling cycle (NAPI
|
||||||
|
poll). In one polling cycle interfaces which are registered to polling are
|
||||||
|
probed in a round-robin manner. The limit of packets in one such probe can be
|
||||||
|
set per-device via sysfs class/net/<device>/weight .
|
||||||
|
|
||||||
|
netdev_max_backlog
|
||||||
|
------------------
|
||||||
|
|
||||||
|
Maximum number of packets, queued on the INPUT side, when the interface
|
||||||
|
receives packets faster than kernel can process them.
|
||||||
|
|
||||||
|
optmem_max
|
||||||
|
----------
|
||||||
|
|
||||||
|
Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
|
||||||
|
of struct cmsghdr structures with appended data.
|
||||||
|
|
||||||
|
2. /proc/sys/net/unix - Parameters for Unix domain sockets
|
||||||
|
-------------------------------------------------------
|
||||||
|
|
||||||
|
There is only one file in this directory.
|
||||||
|
unix_dgram_qlen limits the max number of datagrams queued in Unix domain
|
||||||
|
socket's buffer. It will not take effect unless PF_UNIX flag is spicified.
|
||||||
|
|
||||||
|
|
||||||
|
3. /proc/sys/net/ipv4 - IPV4 settings
|
||||||
|
-------------------------------------------------------
|
||||||
|
Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for
|
||||||
|
descriptions of these entries.
|
||||||
|
|
||||||
|
|
||||||
|
4. Appletalk
|
||||||
|
-------------------------------------------------------
|
||||||
|
|
||||||
|
The /proc/sys/net/appletalk directory holds the Appletalk configuration data
|
||||||
|
when Appletalk is loaded. The configurable parameters are:
|
||||||
|
|
||||||
|
aarp-expiry-time
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The amount of time we keep an ARP entry before expiring it. Used to age out
|
||||||
|
old hosts.
|
||||||
|
|
||||||
|
aarp-resolve-time
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
The amount of time we will spend trying to resolve an Appletalk address.
|
||||||
|
|
||||||
|
aarp-retransmit-limit
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
The number of times we will retransmit a query before giving up.
|
||||||
|
|
||||||
|
aarp-tick-time
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Controls the rate at which expires are checked.
|
||||||
|
|
||||||
|
The directory /proc/net/appletalk holds the list of active Appletalk sockets
|
||||||
|
on a machine.
|
||||||
|
|
||||||
|
The fields indicate the DDP type, the local address (in network:node format)
|
||||||
|
the remote address, the size of the transmit pending queue, the size of the
|
||||||
|
received queue (bytes waiting for applications to read) the state and the uid
|
||||||
|
owning the socket.
|
||||||
|
|
||||||
|
/proc/net/atalk_iface lists all the interfaces configured for appletalk.It
|
||||||
|
shows the name of the interface, its Appletalk address, the network range on
|
||||||
|
that address (or network number for phase 1 networks), and the status of the
|
||||||
|
interface.
|
||||||
|
|
||||||
|
/proc/net/atalk_route lists each known network route. It lists the target
|
||||||
|
(network) that the route leads to, the router (may be directly connected), the
|
||||||
|
route flags, and the device the route is using.
|
||||||
|
|
||||||
|
|
||||||
|
5. IPX
|
||||||
|
-------------------------------------------------------
|
||||||
|
|
||||||
|
The IPX protocol has no tunable values in proc/sys/net.
|
||||||
|
|
||||||
|
The IPX protocol does, however, provide proc/net/ipx. This lists each IPX
|
||||||
|
socket giving the local and remote addresses in Novell format (that is
|
||||||
|
network:node:port). In accordance with the strange Novell tradition,
|
||||||
|
everything but the port is in hex. Not_Connected is displayed for sockets that
|
||||||
|
are not tied to a specific remote address. The Tx and Rx queue sizes indicate
|
||||||
|
the number of bytes pending for transmission and reception. The state
|
||||||
|
indicates the state the socket is in and the uid is the owning uid of the
|
||||||
|
socket.
|
||||||
|
|
||||||
|
The /proc/net/ipx_interface file lists all IPX interfaces. For each interface
|
||||||
|
it gives the network number, the node number, and indicates if the network is
|
||||||
|
the primary network. It also indicates which device it is bound to (or
|
||||||
|
Internal for internal networks) and the Frame Type if appropriate. Linux
|
||||||
|
supports 802.3, 802.2, 802.2 SNAP and DIX (Blue Book) ethernet framing for
|
||||||
|
IPX.
|
||||||
|
|
||||||
|
The /proc/net/ipx_route table holds a list of IPX routes. For each route it
|
||||||
|
gives the destination network, the router node (or Directly) and the network
|
||||||
|
address of the router (or Connected) for internal networks.
|
|
@ -39,6 +39,8 @@ Currently, these files are in /proc/sys/vm:
|
||||||
- nr_hugepages
|
- nr_hugepages
|
||||||
- nr_overcommit_hugepages
|
- nr_overcommit_hugepages
|
||||||
- nr_pdflush_threads
|
- nr_pdflush_threads
|
||||||
|
- nr_pdflush_threads_min
|
||||||
|
- nr_pdflush_threads_max
|
||||||
- nr_trim_pages (only if CONFIG_MMU=n)
|
- nr_trim_pages (only if CONFIG_MMU=n)
|
||||||
- numa_zonelist_order
|
- numa_zonelist_order
|
||||||
- oom_dump_tasks
|
- oom_dump_tasks
|
||||||
|
@ -463,6 +465,32 @@ The default value is 0.
|
||||||
|
|
||||||
==============================================================
|
==============================================================
|
||||||
|
|
||||||
|
nr_pdflush_threads_min
|
||||||
|
|
||||||
|
This value controls the minimum number of pdflush threads.
|
||||||
|
|
||||||
|
At boot time, the kernel will create and maintain 'nr_pdflush_threads_min'
|
||||||
|
threads for the kernel's lifetime.
|
||||||
|
|
||||||
|
The default value is 2. The minimum value you can specify is 1, and
|
||||||
|
the maximum value is the current setting of 'nr_pdflush_threads_max'.
|
||||||
|
|
||||||
|
See 'nr_pdflush_threads_max' below for more information.
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
|
nr_pdflush_threads_max
|
||||||
|
|
||||||
|
This value controls the maximum number of pdflush threads that can be
|
||||||
|
created. The pdflush algorithm will create a new pdflush thread (up to
|
||||||
|
this maximum) if no pdflush threads have been available for >= 1 second.
|
||||||
|
|
||||||
|
The default value is 8. The minimum value you can specify is the
|
||||||
|
current value of 'nr_pdflush_threads_min' and the
|
||||||
|
maximum is 1000.
|
||||||
|
|
||||||
|
==============================================================
|
||||||
|
|
||||||
overcommit_memory:
|
overcommit_memory:
|
||||||
|
|
||||||
This value contains a flag that enables memory overcommitment.
|
This value contains a flag that enables memory overcommitment.
|
||||||
|
|
|
@ -115,6 +115,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
|
||||||
|
|
||||||
'x' - Used by xmon interface on ppc/powerpc platforms.
|
'x' - Used by xmon interface on ppc/powerpc platforms.
|
||||||
|
|
||||||
|
'z' - Dump the ftrace buffer
|
||||||
|
|
||||||
'0'-'9' - Sets the console log level, controlling which kernel messages
|
'0'-'9' - Sets the console log level, controlling which kernel messages
|
||||||
will be printed to your console. ('0', for example would make
|
will be printed to your console. ('0', for example would make
|
||||||
it so that only emergency messages like PANICs or OOPSes would
|
it so that only emergency messages like PANICs or OOPSes would
|
||||||
|
|
|
@ -45,8 +45,8 @@ In include/trace/subsys.h :
|
||||||
#include <linux/tracepoint.h>
|
#include <linux/tracepoint.h>
|
||||||
|
|
||||||
DECLARE_TRACE(subsys_eventname,
|
DECLARE_TRACE(subsys_eventname,
|
||||||
TPPROTO(int firstarg, struct task_struct *p),
|
TP_PROTO(int firstarg, struct task_struct *p),
|
||||||
TPARGS(firstarg, p));
|
TP_ARGS(firstarg, p));
|
||||||
|
|
||||||
In subsys/file.c (where the tracing statement must be added) :
|
In subsys/file.c (where the tracing statement must be added) :
|
||||||
|
|
||||||
|
@ -66,10 +66,10 @@ Where :
|
||||||
- subsys is the name of your subsystem.
|
- subsys is the name of your subsystem.
|
||||||
- eventname is the name of the event to trace.
|
- eventname is the name of the event to trace.
|
||||||
|
|
||||||
- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the
|
- TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the
|
||||||
function called by this tracepoint.
|
function called by this tracepoint.
|
||||||
|
|
||||||
- TPARGS(firstarg, p) are the parameters names, same as found in the
|
- TP_ARGS(firstarg, p) are the parameters names, same as found in the
|
||||||
prototype.
|
prototype.
|
||||||
|
|
||||||
Connecting a function (probe) to a tracepoint is done by providing a
|
Connecting a function (probe) to a tracepoint is done by providing a
|
||||||
|
@ -103,13 +103,14 @@ used to export the defined tracepoints.
|
||||||
|
|
||||||
* Probe / tracepoint example
|
* Probe / tracepoint example
|
||||||
|
|
||||||
See the example provided in samples/tracepoints/src
|
See the example provided in samples/tracepoints
|
||||||
|
|
||||||
Compile them with your kernel.
|
Compile them with your kernel. They are built during 'make' (not
|
||||||
|
'make modules') when CONFIG_SAMPLE_TRACEPOINTS=m.
|
||||||
|
|
||||||
Run, as root :
|
Run, as root :
|
||||||
modprobe tracepoint-example (insmod order is not important)
|
modprobe tracepoint-sample (insmod order is not important)
|
||||||
modprobe tracepoint-probe-example
|
modprobe tracepoint-probe-sample
|
||||||
cat /proc/tracepoint-example (returns an expected error)
|
cat /proc/tracepoint-sample (returns an expected error)
|
||||||
rmmod tracepoint-example tracepoint-probe-example
|
rmmod tracepoint-sample tracepoint-probe-sample
|
||||||
dmesg
|
dmesg
|
||||||
|
|
125
Documentation/video4linux/pxa_camera.txt
Normal file
125
Documentation/video4linux/pxa_camera.txt
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
PXA-Camera Host Driver
|
||||||
|
======================
|
||||||
|
|
||||||
|
Constraints
|
||||||
|
-----------
|
||||||
|
a) Image size for YUV422P format
|
||||||
|
All YUV422P images are enforced to have width x height % 16 = 0.
|
||||||
|
This is due to DMA constraints, which transfers only planes of 8 byte
|
||||||
|
multiples.
|
||||||
|
|
||||||
|
|
||||||
|
Global video workflow
|
||||||
|
---------------------
|
||||||
|
a) QCI stopped
|
||||||
|
Initialy, the QCI interface is stopped.
|
||||||
|
When a buffer is queued (pxa_videobuf_ops->buf_queue), the QCI starts.
|
||||||
|
|
||||||
|
b) QCI started
|
||||||
|
More buffers can be queued while the QCI is started without halting the
|
||||||
|
capture. The new buffers are "appended" at the tail of the DMA chain, and
|
||||||
|
smoothly captured one frame after the other.
|
||||||
|
|
||||||
|
Once a buffer is filled in the QCI interface, it is marked as "DONE" and
|
||||||
|
removed from the active buffers list. It can be then requeud or dequeued by
|
||||||
|
userland application.
|
||||||
|
|
||||||
|
Once the last buffer is filled in, the QCI interface stops.
|
||||||
|
|
||||||
|
|
||||||
|
DMA usage
|
||||||
|
---------
|
||||||
|
a) DMA flow
|
||||||
|
- first buffer queued for capture
|
||||||
|
Once a first buffer is queued for capture, the QCI is started, but data
|
||||||
|
transfer is not started. On "End Of Frame" interrupt, the irq handler
|
||||||
|
starts the DMA chain.
|
||||||
|
- capture of one videobuffer
|
||||||
|
The DMA chain starts transfering data into videobuffer RAM pages.
|
||||||
|
When all pages are transfered, the DMA irq is raised on "ENDINTR" status
|
||||||
|
- finishing one videobuffer
|
||||||
|
The DMA irq handler marks the videobuffer as "done", and removes it from
|
||||||
|
the active running queue
|
||||||
|
Meanwhile, the next videobuffer (if there is one), is transfered by DMA
|
||||||
|
- finishing the last videobuffer
|
||||||
|
On the DMA irq of the last videobuffer, the QCI is stopped.
|
||||||
|
|
||||||
|
b) DMA prepared buffer will have this structure
|
||||||
|
|
||||||
|
+------------+-----+---------------+-----------------+
|
||||||
|
| desc-sg[0] | ... | desc-sg[last] | finisher/linker |
|
||||||
|
+------------+-----+---------------+-----------------+
|
||||||
|
|
||||||
|
This structure is pointed by dma->sg_cpu.
|
||||||
|
The descriptors are used as follows :
|
||||||
|
- desc-sg[i]: i-th descriptor, transfering the i-th sg
|
||||||
|
element to the video buffer scatter gather
|
||||||
|
- finisher: has ddadr=DADDR_STOP, dcmd=ENDIRQEN
|
||||||
|
- linker: has ddadr= desc-sg[0] of next video buffer, dcmd=0
|
||||||
|
|
||||||
|
For the next schema, let's assume d0=desc-sg[0] .. dN=desc-sg[N],
|
||||||
|
"f" stands for finisher and "l" for linker.
|
||||||
|
A typical running chain is :
|
||||||
|
|
||||||
|
Videobuffer 1 Videobuffer 2
|
||||||
|
+---------+----+---+ +----+----+----+---+
|
||||||
|
| d0 | .. | dN | l | | d0 | .. | dN | f |
|
||||||
|
+---------+----+-|-+ ^----+----+----+---+
|
||||||
|
| |
|
||||||
|
+----+
|
||||||
|
|
||||||
|
After the chaining is finished, the chain looks like :
|
||||||
|
|
||||||
|
Videobuffer 1 Videobuffer 2 Videobuffer 3
|
||||||
|
+---------+----+---+ +----+----+----+---+ +----+----+----+---+
|
||||||
|
| d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f |
|
||||||
|
+---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+
|
||||||
|
| | | |
|
||||||
|
+----+ +----+
|
||||||
|
new_link
|
||||||
|
|
||||||
|
c) DMA hot chaining timeslice issue
|
||||||
|
|
||||||
|
As DMA chaining is done while DMA _is_ running, the linking may be done
|
||||||
|
while the DMA jumps from one Videobuffer to another. On the schema, that
|
||||||
|
would be a problem if the following sequence is encountered :
|
||||||
|
|
||||||
|
- DMA chain is Videobuffer1 + Videobuffer2
|
||||||
|
- pxa_videobuf_queue() is called to queue Videobuffer3
|
||||||
|
- DMA controller finishes Videobuffer2, and DMA stops
|
||||||
|
=>
|
||||||
|
Videobuffer 1 Videobuffer 2
|
||||||
|
+---------+----+---+ +----+----+----+---+
|
||||||
|
| d0 | .. | dN | l | | d0 | .. | dN | f |
|
||||||
|
+---------+----+-|-+ ^----+----+----+-^-+
|
||||||
|
| | |
|
||||||
|
+----+ +-- DMA DDADR loads DDADR_STOP
|
||||||
|
|
||||||
|
- pxa_dma_add_tail_buf() is called, the Videobuffer2 "finisher" is
|
||||||
|
replaced by a "linker" to Videobuffer3 (creation of new_link)
|
||||||
|
- pxa_videobuf_queue() finishes
|
||||||
|
- the DMA irq handler is called, which terminates Videobuffer2
|
||||||
|
- Videobuffer3 capture is not scheduled on DMA chain (as it stopped !!!)
|
||||||
|
|
||||||
|
Videobuffer 1 Videobuffer 2 Videobuffer 3
|
||||||
|
+---------+----+---+ +----+----+----+---+ +----+----+----+---+
|
||||||
|
| d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f |
|
||||||
|
+---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+
|
||||||
|
| | | |
|
||||||
|
+----+ +----+
|
||||||
|
new_link
|
||||||
|
DMA DDADR still is DDADR_STOP
|
||||||
|
|
||||||
|
- pxa_camera_check_link_miss() is called
|
||||||
|
This checks if the DMA is finished and a buffer is still on the
|
||||||
|
pcdev->capture list. If that's the case, the capture will be restarted,
|
||||||
|
and Videobuffer3 is scheduled on DMA chain.
|
||||||
|
- the DMA irq handler finishes
|
||||||
|
|
||||||
|
Note: if DMA stops just after pxa_camera_check_link_miss() reads DDADR()
|
||||||
|
value, we have the guarantee that the DMA irq handler will be called back
|
||||||
|
when the DMA will finish the buffer, and pxa_camera_check_link_miss() will
|
||||||
|
be called again, to reschedule Videobuffer3.
|
||||||
|
|
||||||
|
--
|
||||||
|
Author: Robert Jarzmik <robert.jarzmik@free.fr>
|
|
@ -90,7 +90,7 @@ up before calling v4l2_device_register then it will be untouched. If dev is
|
||||||
NULL, then you *must* setup v4l2_dev->name before calling v4l2_device_register.
|
NULL, then you *must* setup v4l2_dev->name before calling v4l2_device_register.
|
||||||
|
|
||||||
The first 'dev' argument is normally the struct device pointer of a pci_dev,
|
The first 'dev' argument is normally the struct device pointer of a pci_dev,
|
||||||
usb_device or platform_device. It is rare for dev to be NULL, but it happens
|
usb_interface or platform_device. It is rare for dev to be NULL, but it happens
|
||||||
with ISA devices or when one device creates multiple PCI devices, thus making
|
with ISA devices or when one device creates multiple PCI devices, thus making
|
||||||
it impossible to associate v4l2_dev with a particular parent.
|
it impossible to associate v4l2_dev with a particular parent.
|
||||||
|
|
||||||
|
@ -351,17 +351,6 @@ And this to go from an i2c_client to a v4l2_subdev struct:
|
||||||
|
|
||||||
struct v4l2_subdev *sd = i2c_get_clientdata(client);
|
struct v4l2_subdev *sd = i2c_get_clientdata(client);
|
||||||
|
|
||||||
Finally you need to make a command function to make driver->command()
|
|
||||||
call the right subdev_ops functions:
|
|
||||||
|
|
||||||
static int subdev_command(struct i2c_client *client, unsigned cmd, void *arg)
|
|
||||||
{
|
|
||||||
return v4l2_subdev_command(i2c_get_clientdata(client), cmd, arg);
|
|
||||||
}
|
|
||||||
|
|
||||||
If driver->command is never used then you can leave this out. Eventually the
|
|
||||||
driver->command usage should be removed from v4l.
|
|
||||||
|
|
||||||
Make sure to call v4l2_device_unregister_subdev(sd) when the remove() callback
|
Make sure to call v4l2_device_unregister_subdev(sd) when the remove() callback
|
||||||
is called. This will unregister the sub-device from the bridge driver. It is
|
is called. This will unregister the sub-device from the bridge driver. It is
|
||||||
safe to call this even if the sub-device was never registered.
|
safe to call this even if the sub-device was never registered.
|
||||||
|
@ -375,14 +364,12 @@ from the remove() callback ensures that this is always done correctly.
|
||||||
|
|
||||||
The bridge driver also has some helper functions it can use:
|
The bridge driver also has some helper functions it can use:
|
||||||
|
|
||||||
struct v4l2_subdev *sd = v4l2_i2c_new_subdev(adapter, "module_foo", "chipid", 0x36);
|
struct v4l2_subdev *sd = v4l2_i2c_new_subdev(v4l2_dev, adapter,
|
||||||
|
"module_foo", "chipid", 0x36);
|
||||||
|
|
||||||
This loads the given module (can be NULL if no module needs to be loaded) and
|
This loads the given module (can be NULL if no module needs to be loaded) and
|
||||||
calls i2c_new_device() with the given i2c_adapter and chip/address arguments.
|
calls i2c_new_device() with the given i2c_adapter and chip/address arguments.
|
||||||
If all goes well, then it registers the subdev with the v4l2_device. It gets
|
If all goes well, then it registers the subdev with the v4l2_device.
|
||||||
the v4l2_device by calling i2c_get_adapdata(adapter), so you should make sure
|
|
||||||
to call i2c_set_adapdata(adapter, v4l2_device) when you setup the i2c_adapter
|
|
||||||
in your driver.
|
|
||||||
|
|
||||||
You can also use v4l2_i2c_new_probed_subdev() which is very similar to
|
You can also use v4l2_i2c_new_probed_subdev() which is very similar to
|
||||||
v4l2_i2c_new_subdev(), except that it has an array of possible I2C addresses
|
v4l2_i2c_new_subdev(), except that it has an array of possible I2C addresses
|
||||||
|
|
126
Documentation/vm/kmemtrace.txt
Normal file
126
Documentation/vm/kmemtrace.txt
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
kmemtrace - Kernel Memory Tracer
|
||||||
|
|
||||||
|
by Eduard - Gabriel Munteanu
|
||||||
|
<eduard.munteanu@linux360.ro>
|
||||||
|
|
||||||
|
I. Introduction
|
||||||
|
===============
|
||||||
|
|
||||||
|
kmemtrace helps kernel developers figure out two things:
|
||||||
|
1) how different allocators (SLAB, SLUB etc.) perform
|
||||||
|
2) how kernel code allocates memory and how much
|
||||||
|
|
||||||
|
To do this, we trace every allocation and export information to the userspace
|
||||||
|
through the relay interface. We export things such as the number of requested
|
||||||
|
bytes, the number of bytes actually allocated (i.e. including internal
|
||||||
|
fragmentation), whether this is a slab allocation or a plain kmalloc() and so
|
||||||
|
on.
|
||||||
|
|
||||||
|
The actual analysis is performed by a userspace tool (see section III for
|
||||||
|
details on where to get it from). It logs the data exported by the kernel,
|
||||||
|
processes it and (as of writing this) can provide the following information:
|
||||||
|
- the total amount of memory allocated and fragmentation per call-site
|
||||||
|
- the amount of memory allocated and fragmentation per allocation
|
||||||
|
- total memory allocated and fragmentation in the collected dataset
|
||||||
|
- number of cross-CPU allocation and frees (makes sense in NUMA environments)
|
||||||
|
|
||||||
|
Moreover, it can potentially find inconsistent and erroneous behavior in
|
||||||
|
kernel code, such as using slab free functions on kmalloc'ed memory or
|
||||||
|
allocating less memory than requested (but not truly failed allocations).
|
||||||
|
|
||||||
|
kmemtrace also makes provisions for tracing on some arch and analysing the
|
||||||
|
data on another.
|
||||||
|
|
||||||
|
II. Design and goals
|
||||||
|
====================
|
||||||
|
|
||||||
|
kmemtrace was designed to handle rather large amounts of data. Thus, it uses
|
||||||
|
the relay interface to export whatever is logged to userspace, which then
|
||||||
|
stores it. Analysis and reporting is done asynchronously, that is, after the
|
||||||
|
data is collected and stored. By design, it allows one to log and analyse
|
||||||
|
on different machines and different arches.
|
||||||
|
|
||||||
|
As of writing this, the ABI is not considered stable, though it might not
|
||||||
|
change much. However, no guarantees are made about compatibility yet. When
|
||||||
|
deemed stable, the ABI should still allow easy extension while maintaining
|
||||||
|
backward compatibility. This is described further in Documentation/ABI.
|
||||||
|
|
||||||
|
Summary of design goals:
|
||||||
|
- allow logging and analysis to be done across different machines
|
||||||
|
- be fast and anticipate usage in high-load environments (*)
|
||||||
|
- be reasonably extensible
|
||||||
|
- make it possible for GNU/Linux distributions to have kmemtrace
|
||||||
|
included in their repositories
|
||||||
|
|
||||||
|
(*) - one of the reasons Pekka Enberg's original userspace data analysis
|
||||||
|
tool's code was rewritten from Perl to C (although this is more than a
|
||||||
|
simple conversion)
|
||||||
|
|
||||||
|
|
||||||
|
III. Quick usage guide
|
||||||
|
======================
|
||||||
|
|
||||||
|
1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
|
||||||
|
CONFIG_KMEMTRACE).
|
||||||
|
|
||||||
|
2) Get the userspace tool and build it:
|
||||||
|
$ git-clone git://repo.or.cz/kmemtrace-user.git # current repository
|
||||||
|
$ cd kmemtrace-user/
|
||||||
|
$ ./autogen.sh
|
||||||
|
$ ./configure
|
||||||
|
$ make
|
||||||
|
|
||||||
|
3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
|
||||||
|
'single' runlevel (so that relay buffers don't fill up easily), and run
|
||||||
|
kmemtrace:
|
||||||
|
# '$' does not mean user, but root here.
|
||||||
|
$ mount -t debugfs none /sys/kernel/debug
|
||||||
|
$ mount -t proc none /proc
|
||||||
|
$ cd path/to/kmemtrace-user/
|
||||||
|
$ ./kmemtraced
|
||||||
|
Wait a bit, then stop it with CTRL+C.
|
||||||
|
$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
|
||||||
|
# overrun, should
|
||||||
|
# be zero.
|
||||||
|
$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
|
||||||
|
check its correctness]
|
||||||
|
$ ./kmemtrace-report
|
||||||
|
|
||||||
|
Now you should have a nice and short summary of how the allocator performs.
|
||||||
|
|
||||||
|
IV. FAQ and known issues
|
||||||
|
========================
|
||||||
|
|
||||||
|
Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
|
||||||
|
this? Should I worry?
|
||||||
|
A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
|
||||||
|
large the number is. You can fix it by supplying a higher
|
||||||
|
'kmemtrace.subbufs=N' kernel parameter.
|
||||||
|
---
|
||||||
|
|
||||||
|
Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
|
||||||
|
A: This is a bug and should be reported. It can occur for a variety of
|
||||||
|
reasons:
|
||||||
|
- possible bugs in relay code
|
||||||
|
- possible misuse of relay by kmemtrace
|
||||||
|
- timestamps being collected unorderly
|
||||||
|
Or you may fix it yourself and send us a patch.
|
||||||
|
---
|
||||||
|
|
||||||
|
Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
|
||||||
|
A: This is a known issue and I'm working on it. These might be true errors
|
||||||
|
in kernel code, which may have inconsistent behavior (e.g. allocating memory
|
||||||
|
with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
|
||||||
|
out this behavior may work with SLAB, but may fail with other allocators.
|
||||||
|
|
||||||
|
It may also be due to lack of tracing in some unusual allocator functions.
|
||||||
|
|
||||||
|
We don't want bug reports regarding this issue yet.
|
||||||
|
---
|
||||||
|
|
||||||
|
V. See also
|
||||||
|
===========
|
||||||
|
|
||||||
|
Documentation/kernel-parameters.txt
|
||||||
|
Documentation/ABI/testing/debugfs-kmemtrace
|
||||||
|
|
|
@ -8,7 +8,8 @@ The current memory policy support was added to Linux 2.6 around May 2004. This
|
||||||
document attempts to describe the concepts and APIs of the 2.6 memory policy
|
document attempts to describe the concepts and APIs of the 2.6 memory policy
|
||||||
support.
|
support.
|
||||||
|
|
||||||
Memory policies should not be confused with cpusets (Documentation/cpusets.txt)
|
Memory policies should not be confused with cpusets
|
||||||
|
(Documentation/cgroups/cpusets.txt)
|
||||||
which is an administrative mechanism for restricting the nodes from which
|
which is an administrative mechanism for restricting the nodes from which
|
||||||
memory may be allocated by a set of processes. Memory policies are a
|
memory may be allocated by a set of processes. Memory policies are a
|
||||||
programming interface that a NUMA-aware application can take advantage of. When
|
programming interface that a NUMA-aware application can take advantage of. When
|
||||||
|
|
|
@ -37,7 +37,8 @@ locations.
|
||||||
|
|
||||||
Larger installations usually partition the system using cpusets into
|
Larger installations usually partition the system using cpusets into
|
||||||
sections of nodes. Paul Jackson has equipped cpusets with the ability to
|
sections of nodes. Paul Jackson has equipped cpusets with the ability to
|
||||||
move pages when a task is moved to another cpuset (See ../cpusets.txt).
|
move pages when a task is moved to another cpuset (See
|
||||||
|
Documentation/cgroups/cpusets.txt).
|
||||||
Cpusets allows the automation of process locality. If a task is moved to
|
Cpusets allows the automation of process locality. If a task is moved to
|
||||||
a new cpuset then also all its pages are moved with it so that the
|
a new cpuset then also all its pages are moved with it so that the
|
||||||
performance of the process does not sink dramatically. Also the pages
|
performance of the process does not sink dramatically. Also the pages
|
||||||
|
|
|
@ -7,7 +7,8 @@ you can create fake NUMA nodes that represent contiguous chunks of memory and
|
||||||
assign them to cpusets and their attached tasks. This is a way of limiting the
|
assign them to cpusets and their attached tasks. This is a way of limiting the
|
||||||
amount of system memory that are available to a certain class of tasks.
|
amount of system memory that are available to a certain class of tasks.
|
||||||
|
|
||||||
For more information on the features of cpusets, see Documentation/cpusets.txt.
|
For more information on the features of cpusets, see
|
||||||
|
Documentation/cgroups/cpusets.txt.
|
||||||
There are a number of different configurations you can use for your needs. For
|
There are a number of different configurations you can use for your needs. For
|
||||||
more information on the numa=fake command line option and its various ways of
|
more information on the numa=fake command line option and its various ways of
|
||||||
configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt.
|
configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt.
|
||||||
|
@ -32,7 +33,7 @@ A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:
|
||||||
On node 3 totalpages: 131072
|
On node 3 totalpages: 131072
|
||||||
|
|
||||||
Now following the instructions for mounting the cpusets filesystem from
|
Now following the instructions for mounting the cpusets filesystem from
|
||||||
Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
|
Documentation/cgroups/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
|
||||||
address spaces) to individual cpusets:
|
address spaces) to individual cpusets:
|
||||||
|
|
||||||
[root@xroads /]# mkdir exampleset
|
[root@xroads /]# mkdir exampleset
|
||||||
|
|
76
MAINTAINERS
76
MAINTAINERS
|
@ -1422,6 +1422,11 @@ P: Doug Warzecha
|
||||||
M: Douglas_Warzecha@dell.com
|
M: Douglas_Warzecha@dell.com
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
|
DELL WMI EXTRAS DRIVER
|
||||||
|
P: Matthew Garrett
|
||||||
|
M: mjg59@srcf.ucam.org
|
||||||
|
S: Maintained
|
||||||
|
|
||||||
DEVICE NUMBER REGISTRY
|
DEVICE NUMBER REGISTRY
|
||||||
P: Torben Mathiasen
|
P: Torben Mathiasen
|
||||||
M: device@lanana.org
|
M: device@lanana.org
|
||||||
|
@ -1539,7 +1544,6 @@ S: Maintained
|
||||||
DVB SUBSYSTEM AND DRIVERS
|
DVB SUBSYSTEM AND DRIVERS
|
||||||
P: LinuxTV.org Project
|
P: LinuxTV.org Project
|
||||||
M: linux-media@vger.kernel.org
|
M: linux-media@vger.kernel.org
|
||||||
L: linux-dvb@linuxtv.org (subscription required)
|
|
||||||
W: http://linuxtv.org/
|
W: http://linuxtv.org/
|
||||||
T: git kernel.org:/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
|
T: git kernel.org:/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
@ -1763,6 +1767,12 @@ M: viro@zeniv.linux.org.uk
|
||||||
L: linux-fsdevel@vger.kernel.org
|
L: linux-fsdevel@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
|
FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
|
||||||
|
P: Riku Voipio
|
||||||
|
M: riku.vipio@iki.fi
|
||||||
|
L: lm-sensors@lm-sensors.org
|
||||||
|
S: Maintained
|
||||||
|
|
||||||
FIREWIRE SUBSYSTEM (drivers/firewire, <linux/firewire*.h>)
|
FIREWIRE SUBSYSTEM (drivers/firewire, <linux/firewire*.h>)
|
||||||
P: Kristian Hoegsberg, Stefan Richter
|
P: Kristian Hoegsberg, Stefan Richter
|
||||||
M: krh@redhat.com, stefanr@s5r6.in-berlin.de
|
M: krh@redhat.com, stefanr@s5r6.in-berlin.de
|
||||||
|
@ -1945,6 +1955,12 @@ L: lm-sensors@lm-sensors.org
|
||||||
W: http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/
|
W: http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
|
HYPERVISOR VIRTUAL CONSOLE DRIVER
|
||||||
|
L: linuxppc-dev@ozlabs.org
|
||||||
|
L: linux-kernel@vger.kernel.org
|
||||||
|
S: Odd Fixes
|
||||||
|
F: drivers/char/hvc_*
|
||||||
|
|
||||||
GSPCA FINEPIX SUBDRIVER
|
GSPCA FINEPIX SUBDRIVER
|
||||||
P: Frank Zago
|
P: Frank Zago
|
||||||
M: frank@zago.net
|
M: frank@zago.net
|
||||||
|
@ -2642,6 +2658,12 @@ M: jason.wessel@windriver.com
|
||||||
L: kgdb-bugreport@lists.sourceforge.net
|
L: kgdb-bugreport@lists.sourceforge.net
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
|
KMEMTRACE
|
||||||
|
P: Eduard - Gabriel Munteanu
|
||||||
|
M: eduard.munteanu@linux360.ro
|
||||||
|
L: linux-kernel@vger.kernel.org
|
||||||
|
S: Maintained
|
||||||
|
|
||||||
KPROBES
|
KPROBES
|
||||||
P: Ananth N Mavinakayanahalli
|
P: Ananth N Mavinakayanahalli
|
||||||
M: ananth@in.ibm.com
|
M: ananth@in.ibm.com
|
||||||
|
@ -2912,6 +2934,12 @@ M: buytenh@marvell.com
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
S: Supported
|
S: Supported
|
||||||
|
|
||||||
|
MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER
|
||||||
|
P: Nicolas Pitre
|
||||||
|
M: nico@cam.org
|
||||||
|
L: linux-kernel@vger.kernel.org
|
||||||
|
S: Maintained
|
||||||
|
|
||||||
MARVELL YUKON / SYSKONNECT DRIVER
|
MARVELL YUKON / SYSKONNECT DRIVER
|
||||||
P: Mirko Lindner
|
P: Mirko Lindner
|
||||||
M: mlindner@syskonnect.de
|
M: mlindner@syskonnect.de
|
||||||
|
@ -3022,8 +3050,9 @@ S: Maintained
|
||||||
|
|
||||||
MULTIFUNCTION DEVICES (MFD)
|
MULTIFUNCTION DEVICES (MFD)
|
||||||
P: Samuel Ortiz
|
P: Samuel Ortiz
|
||||||
M: sameo@openedhand.com
|
M: sameo@linux.intel.com
|
||||||
L: linux-kernel@vger.kernel.org
|
L: linux-kernel@vger.kernel.org
|
||||||
|
T: git kernel.org:/pub/scm/linux/kernel/git/sameo/mfd-2.6.git
|
||||||
S: Supported
|
S: Supported
|
||||||
|
|
||||||
MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM
|
MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM
|
||||||
|
@ -3098,7 +3127,7 @@ M: shemminger@linux-foundation.org
|
||||||
L: netem@lists.linux-foundation.org
|
L: netem@lists.linux-foundation.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
NETERION (S2IO) Xframe 10GbE DRIVER
|
NETERION (S2IO) 10GbE DRIVER (xframe/vxge)
|
||||||
P: Ramkrishna Vepa
|
P: Ramkrishna Vepa
|
||||||
M: ram.vepa@neterion.com
|
M: ram.vepa@neterion.com
|
||||||
P: Rastapur Santosh
|
P: Rastapur Santosh
|
||||||
|
@ -3107,8 +3136,11 @@ P: Sivakumar Subramani
|
||||||
M: sivakumar.subramani@neterion.com
|
M: sivakumar.subramani@neterion.com
|
||||||
P: Sreenivasa Honnur
|
P: Sreenivasa Honnur
|
||||||
M: sreenivasa.honnur@neterion.com
|
M: sreenivasa.honnur@neterion.com
|
||||||
|
P: Anil Murthy
|
||||||
|
M: anil.murthy@neterion.com
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/TitleIndex?anonymous
|
W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/Linux?Anonymous
|
||||||
|
W: http://trac.neterion.com/cgi-bin/trac.cgi/wiki/X3100Linux?Anonymous
|
||||||
S: Supported
|
S: Supported
|
||||||
|
|
||||||
NETFILTER/IPTABLES/IPCHAINS
|
NETFILTER/IPTABLES/IPCHAINS
|
||||||
|
@ -3212,6 +3244,13 @@ M: andi@lisas.de
|
||||||
L: netdev@vger.kernel.org
|
L: netdev@vger.kernel.org
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
|
NILFS2 FILESYSTEM
|
||||||
|
P: KONISHI Ryusuke
|
||||||
|
M: konishi.ryusuke@lab.ntt.co.jp
|
||||||
|
L: users@nilfs.org
|
||||||
|
W: http://www.nilfs.org/en/
|
||||||
|
S: Supported
|
||||||
|
|
||||||
NINJA SCSI-3 / NINJA SCSI-32Bi (16bit/CardBus) PCMCIA SCSI HOST ADAPTER DRIVER
|
NINJA SCSI-3 / NINJA SCSI-32Bi (16bit/CardBus) PCMCIA SCSI HOST ADAPTER DRIVER
|
||||||
P: YOKOTA Hiroshi
|
P: YOKOTA Hiroshi
|
||||||
M: yokota@netlab.is.tsukuba.ac.jp
|
M: yokota@netlab.is.tsukuba.ac.jp
|
||||||
|
@ -3399,6 +3438,11 @@ P: Jim Cromie
|
||||||
M: jim.cromie@gmail.com
|
M: jim.cromie@gmail.com
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
|
PCA9532 LED DRIVER
|
||||||
|
P: Riku Voipio
|
||||||
|
M: riku.voipio@iki.fi
|
||||||
|
S: Maintained
|
||||||
|
|
||||||
PCI ERROR RECOVERY
|
PCI ERROR RECOVERY
|
||||||
P: Linas Vepstas
|
P: Linas Vepstas
|
||||||
M: linas@austin.ibm.com
|
M: linas@austin.ibm.com
|
||||||
|
@ -3896,7 +3940,14 @@ S: Maintained
|
||||||
SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
|
SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
|
||||||
P: Pierre Ossman
|
P: Pierre Ossman
|
||||||
M: drzeus-sdhci@drzeus.cx
|
M: drzeus-sdhci@drzeus.cx
|
||||||
L: sdhci-devel@list.drzeus.cx
|
L: sdhci-devel@lists.ossman.eu
|
||||||
|
S: Maintained
|
||||||
|
|
||||||
|
SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
|
||||||
|
P: Anton Vorontsov
|
||||||
|
M: avorontsov@ru.mvista.com
|
||||||
|
L: linuxppc-dev@ozlabs.org
|
||||||
|
L: sdhci-devel@lists.ossman.eu
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
SECURITY SUBSYSTEM
|
SECURITY SUBSYSTEM
|
||||||
|
@ -4362,10 +4413,7 @@ T: quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.2.x/tomoyo-lsm/patches
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
TOSHIBA ACPI EXTRAS DRIVER
|
TOSHIBA ACPI EXTRAS DRIVER
|
||||||
P: John Belmonte
|
S: Orphan
|
||||||
M: toshiba_acpi@memebeam.org
|
|
||||||
W: http://memebeam.org/toys/ToshibaAcpiDriver
|
|
||||||
S: Maintained
|
|
||||||
|
|
||||||
TOSHIBA SMM DRIVER
|
TOSHIBA SMM DRIVER
|
||||||
P: Jonathan Buzzard
|
P: Jonathan Buzzard
|
||||||
|
@ -4374,6 +4422,11 @@ L: tlinux-users@tce.toshiba-dme.co.jp
|
||||||
W: http://www.buzzard.org.uk/toshiba/
|
W: http://www.buzzard.org.uk/toshiba/
|
||||||
S: Maintained
|
S: Maintained
|
||||||
|
|
||||||
|
TMIO MMC DRIVER
|
||||||
|
P: Ian Molton
|
||||||
|
M: ian@mnementh.co.uk
|
||||||
|
S: Maintained
|
||||||
|
|
||||||
TPM DEVICE DRIVER
|
TPM DEVICE DRIVER
|
||||||
P: Debora Velarde
|
P: Debora Velarde
|
||||||
M: debora@linux.vnet.ibm.com
|
M: debora@linux.vnet.ibm.com
|
||||||
|
@ -4838,7 +4891,7 @@ M: lrg@slimlogic.co.uk
|
||||||
P: Mark Brown
|
P: Mark Brown
|
||||||
M: broonie@opensource.wolfsonmicro.com
|
M: broonie@opensource.wolfsonmicro.com
|
||||||
W: http://opensource.wolfsonmicro.com/node/15
|
W: http://opensource.wolfsonmicro.com/node/15
|
||||||
W: http://www.slimlogic.co.uk/?page_id=5
|
W: http://www.slimlogic.co.uk/?p=48
|
||||||
T: git kernel.org/pub/scm/linux/kernel/git/lrg/voltage-2.6.git
|
T: git kernel.org/pub/scm/linux/kernel/git/lrg/voltage-2.6.git
|
||||||
S: Supported
|
S: Supported
|
||||||
|
|
||||||
|
@ -4960,7 +5013,8 @@ S: Supported
|
||||||
|
|
||||||
XFS FILESYSTEM
|
XFS FILESYSTEM
|
||||||
P: Silicon Graphics Inc
|
P: Silicon Graphics Inc
|
||||||
P: Bill O'Donnell
|
P: Felix Blyakher
|
||||||
|
M: felixb@sgi.com
|
||||||
M: xfs-masters@oss.sgi.com
|
M: xfs-masters@oss.sgi.com
|
||||||
L: xfs@oss.sgi.com
|
L: xfs@oss.sgi.com
|
||||||
W: http://oss.sgi.com/projects/xfs
|
W: http://oss.sgi.com/projects/xfs
|
||||||
|
|
4
Makefile
4
Makefile
|
@ -1,7 +1,7 @@
|
||||||
VERSION = 2
|
VERSION = 2
|
||||||
PATCHLEVEL = 6
|
PATCHLEVEL = 6
|
||||||
SUBLEVEL = 29
|
SUBLEVEL = 30
|
||||||
EXTRAVERSION =
|
EXTRAVERSION = -rc1
|
||||||
NAME = Temporary Tasmanian Devil
|
NAME = Temporary Tasmanian Devil
|
||||||
|
|
||||||
# *DOCUMENTATION*
|
# *DOCUMENTATION*
|
||||||
|
|
|
@ -6,6 +6,7 @@ config OPROFILE
|
||||||
tristate "OProfile system profiling (EXPERIMENTAL)"
|
tristate "OProfile system profiling (EXPERIMENTAL)"
|
||||||
depends on PROFILING
|
depends on PROFILING
|
||||||
depends on HAVE_OPROFILE
|
depends on HAVE_OPROFILE
|
||||||
|
depends on TRACING_SUPPORT
|
||||||
select TRACING
|
select TRACING
|
||||||
select RING_BUFFER
|
select RING_BUFFER
|
||||||
help
|
help
|
||||||
|
|
1
arch/alpha/include/asm/ftrace.h
Normal file
1
arch/alpha/include/asm/ftrace.h
Normal file
|
@ -0,0 +1 @@
|
||||||
|
/* empty */
|
|
@ -14,17 +14,4 @@ typedef struct {
|
||||||
|
|
||||||
void ack_bad_irq(unsigned int irq);
|
void ack_bad_irq(unsigned int irq);
|
||||||
|
|
||||||
#define HARDIRQ_BITS 12
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The hardirq mask has to be large enough to have
|
|
||||||
* space for potentially nestable IRQ sources in the system
|
|
||||||
* to nest on a single CPU. On Alpha, interrupts are masked at the CPU
|
|
||||||
* by IPL as well as at the system level. We only have 8 IPLs (UNIX PALcode)
|
|
||||||
* so we really only have 8 nestable IRQs, but allow some overhead
|
|
||||||
*/
|
|
||||||
#if (1 << HARDIRQ_BITS) < 16
|
|
||||||
#error HARDIRQ_BITS is too low!
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* _ALPHA_HARDIRQ_H */
|
#endif /* _ALPHA_HARDIRQ_H */
|
||||||
|
|
|
@ -166,6 +166,9 @@ static inline void __raw_write_unlock(raw_rwlock_t * lock)
|
||||||
lock->lock = 0;
|
lock->lock = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
|
||||||
|
#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
|
||||||
|
|
||||||
#define _raw_spin_relax(lock) cpu_relax()
|
#define _raw_spin_relax(lock) cpu_relax()
|
||||||
#define _raw_read_relax(lock) cpu_relax()
|
#define _raw_read_relax(lock) cpu_relax()
|
||||||
#define _raw_write_relax(lock) cpu_relax()
|
#define _raw_write_relax(lock) cpu_relax()
|
||||||
|
|
|
@ -272,7 +272,7 @@ alpha_vfork(struct pt_regs *regs)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
|
copy_thread(unsigned long clone_flags, unsigned long usp,
|
||||||
unsigned long unused,
|
unsigned long unused,
|
||||||
struct task_struct * p, struct pt_regs * regs)
|
struct task_struct * p, struct pt_regs * regs)
|
||||||
{
|
{
|
||||||
|
|
|
@ -18,7 +18,10 @@
|
||||||
|
|
||||||
unsigned int __machine_arch_type;
|
unsigned int __machine_arch_type;
|
||||||
|
|
||||||
#include <linux/string.h>
|
#include <linux/compiler.h> /* for inline */
|
||||||
|
#include <linux/types.h> /* for size_t */
|
||||||
|
#include <linux/stddef.h> /* for NULL */
|
||||||
|
#include <asm/string.h>
|
||||||
|
|
||||||
#ifdef STANDALONE_DEBUG
|
#ifdef STANDALONE_DEBUG
|
||||||
#define putstr printf
|
#define putstr printf
|
||||||
|
|
|
@ -474,14 +474,34 @@ CONFIG_NETDEVICES=y
|
||||||
# CONFIG_EQUALIZER is not set
|
# CONFIG_EQUALIZER is not set
|
||||||
# CONFIG_TUN is not set
|
# CONFIG_TUN is not set
|
||||||
# CONFIG_VETH is not set
|
# CONFIG_VETH is not set
|
||||||
# CONFIG_PHYLIB is not set
|
CONFIG_PHYLIB=y
|
||||||
|
|
||||||
|
#
|
||||||
|
# MII PHY device drivers
|
||||||
|
#
|
||||||
|
# CONFIG_MARVELL_PHY is not set
|
||||||
|
# CONFIG_DAVICOM_PHY is not set
|
||||||
|
# CONFIG_QSEMI_PHY is not set
|
||||||
|
# CONFIG_LXT_PHY is not set
|
||||||
|
# CONFIG_CICADA_PHY is not set
|
||||||
|
# CONFIG_VITESSE_PHY is not set
|
||||||
|
CONFIG_SMSC_PHY=y
|
||||||
|
# CONFIG_BROADCOM_PHY is not set
|
||||||
|
# CONFIG_ICPLUS_PHY is not set
|
||||||
|
# CONFIG_REALTEK_PHY is not set
|
||||||
|
# CONFIG_NATIONAL_PHY is not set
|
||||||
|
# CONFIG_STE10XP is not set
|
||||||
|
# CONFIG_LSI_ET1011C_PHY is not set
|
||||||
|
# CONFIG_FIXED_PHY is not set
|
||||||
|
# CONFIG_MDIO_BITBANG is not set
|
||||||
CONFIG_NET_ETHERNET=y
|
CONFIG_NET_ETHERNET=y
|
||||||
CONFIG_MII=y
|
CONFIG_MII=y
|
||||||
# CONFIG_AX88796 is not set
|
# CONFIG_AX88796 is not set
|
||||||
# CONFIG_SMC91X is not set
|
# CONFIG_SMC91X is not set
|
||||||
# CONFIG_DM9000 is not set
|
# CONFIG_DM9000 is not set
|
||||||
# CONFIG_ENC28J60 is not set
|
# CONFIG_ENC28J60 is not set
|
||||||
CONFIG_SMC911X=y
|
# CONFIG_SMC911X is not set
|
||||||
|
CONFIG_SMSC911X=y
|
||||||
# CONFIG_IBM_NEW_EMAC_ZMII is not set
|
# CONFIG_IBM_NEW_EMAC_ZMII is not set
|
||||||
# CONFIG_IBM_NEW_EMAC_RGMII is not set
|
# CONFIG_IBM_NEW_EMAC_RGMII is not set
|
||||||
# CONFIG_IBM_NEW_EMAC_TAH is not set
|
# CONFIG_IBM_NEW_EMAC_TAH is not set
|
||||||
|
|
|
@ -465,12 +465,33 @@ CONFIG_NETDEVICES=y
|
||||||
# CONFIG_EQUALIZER is not set
|
# CONFIG_EQUALIZER is not set
|
||||||
# CONFIG_TUN is not set
|
# CONFIG_TUN is not set
|
||||||
# CONFIG_VETH is not set
|
# CONFIG_VETH is not set
|
||||||
# CONFIG_PHYLIB is not set
|
CONFIG_PHYLIB=y
|
||||||
|
|
||||||
|
#
|
||||||
|
# MII PHY device drivers
|
||||||
|
#
|
||||||
|
# CONFIG_MARVELL_PHY is not set
|
||||||
|
# CONFIG_DAVICOM_PHY is not set
|
||||||
|
# CONFIG_QSEMI_PHY is not set
|
||||||
|
# CONFIG_LXT_PHY is not set
|
||||||
|
# CONFIG_CICADA_PHY is not set
|
||||||
|
# CONFIG_VITESSE_PHY is not set
|
||||||
|
CONFIG_SMSC_PHY=y
|
||||||
|
# CONFIG_BROADCOM_PHY is not set
|
||||||
|
# CONFIG_ICPLUS_PHY is not set
|
||||||
|
# CONFIG_REALTEK_PHY is not set
|
||||||
|
# CONFIG_NATIONAL_PHY is not set
|
||||||
|
# CONFIG_STE10XP is not set
|
||||||
|
# CONFIG_LSI_ET1011C_PHY is not set
|
||||||
|
# CONFIG_FIXED_PHY is not set
|
||||||
|
# CONFIG_MDIO_BITBANG is not set
|
||||||
CONFIG_NET_ETHERNET=y
|
CONFIG_NET_ETHERNET=y
|
||||||
CONFIG_MII=y
|
CONFIG_MII=y
|
||||||
# CONFIG_AX88796 is not set
|
# CONFIG_AX88796 is not set
|
||||||
CONFIG_SMC91X=y
|
CONFIG_SMC91X=y
|
||||||
# CONFIG_DM9000 is not set
|
# CONFIG_DM9000 is not set
|
||||||
|
# CONFIG_SMC911X is not set
|
||||||
|
CONFIG_SMSC911X=y
|
||||||
# CONFIG_IBM_NEW_EMAC_ZMII is not set
|
# CONFIG_IBM_NEW_EMAC_ZMII is not set
|
||||||
# CONFIG_IBM_NEW_EMAC_RGMII is not set
|
# CONFIG_IBM_NEW_EMAC_RGMII is not set
|
||||||
# CONFIG_IBM_NEW_EMAC_TAH is not set
|
# CONFIG_IBM_NEW_EMAC_TAH is not set
|
||||||
|
|
|
@ -496,13 +496,33 @@ CONFIG_NETDEVICES=y
|
||||||
# CONFIG_EQUALIZER is not set
|
# CONFIG_EQUALIZER is not set
|
||||||
# CONFIG_TUN is not set
|
# CONFIG_TUN is not set
|
||||||
# CONFIG_VETH is not set
|
# CONFIG_VETH is not set
|
||||||
# CONFIG_PHYLIB is not set
|
CONFIG_PHYLIB=y
|
||||||
|
|
||||||
|
#
|
||||||
|
# MII PHY device drivers
|
||||||
|
#
|
||||||
|
# CONFIG_MARVELL_PHY is not set
|
||||||
|
# CONFIG_DAVICOM_PHY is not set
|
||||||
|
# CONFIG_QSEMI_PHY is not set
|
||||||
|
# CONFIG_LXT_PHY is not set
|
||||||
|
# CONFIG_CICADA_PHY is not set
|
||||||
|
# CONFIG_VITESSE_PHY is not set
|
||||||
|
CONFIG_SMSC_PHY=y
|
||||||
|
# CONFIG_BROADCOM_PHY is not set
|
||||||
|
# CONFIG_ICPLUS_PHY is not set
|
||||||
|
# CONFIG_REALTEK_PHY is not set
|
||||||
|
# CONFIG_NATIONAL_PHY is not set
|
||||||
|
# CONFIG_STE10XP is not set
|
||||||
|
# CONFIG_LSI_ET1011C_PHY is not set
|
||||||
|
# CONFIG_FIXED_PHY is not set
|
||||||
|
# CONFIG_MDIO_BITBANG is not set
|
||||||
CONFIG_NET_ETHERNET=y
|
CONFIG_NET_ETHERNET=y
|
||||||
CONFIG_MII=y
|
CONFIG_MII=y
|
||||||
# CONFIG_AX88796 is not set
|
# CONFIG_AX88796 is not set
|
||||||
CONFIG_SMC91X=y
|
CONFIG_SMC91X=y
|
||||||
# CONFIG_DM9000 is not set
|
# CONFIG_DM9000 is not set
|
||||||
CONFIG_SMC911X=y
|
# CONFIG_SMC911X is not set
|
||||||
|
CONFIG_SMSC911X=y
|
||||||
# CONFIG_IBM_NEW_EMAC_ZMII is not set
|
# CONFIG_IBM_NEW_EMAC_ZMII is not set
|
||||||
# CONFIG_IBM_NEW_EMAC_RGMII is not set
|
# CONFIG_IBM_NEW_EMAC_RGMII is not set
|
||||||
# CONFIG_IBM_NEW_EMAC_TAH is not set
|
# CONFIG_IBM_NEW_EMAC_TAH is not set
|
||||||
|
|
|
@ -490,13 +490,33 @@ CONFIG_NETDEVICES=y
|
||||||
# CONFIG_EQUALIZER is not set
|
# CONFIG_EQUALIZER is not set
|
||||||
# CONFIG_TUN is not set
|
# CONFIG_TUN is not set
|
||||||
# CONFIG_VETH is not set
|
# CONFIG_VETH is not set
|
||||||
# CONFIG_PHYLIB is not set
|
CONFIG_PHYLIB=y
|
||||||
|
|
||||||
|
#
|
||||||
|
# MII PHY device drivers
|
||||||
|
#
|
||||||
|
# CONFIG_MARVELL_PHY is not set
|
||||||
|
# CONFIG_DAVICOM_PHY is not set
|
||||||
|
# CONFIG_QSEMI_PHY is not set
|
||||||
|
# CONFIG_LXT_PHY is not set
|
||||||
|
# CONFIG_CICADA_PHY is not set
|
||||||
|
# CONFIG_VITESSE_PHY is not set
|
||||||
|
CONFIG_SMSC_PHY=y
|
||||||
|
# CONFIG_BROADCOM_PHY is not set
|
||||||
|
# CONFIG_ICPLUS_PHY is not set
|
||||||
|
# CONFIG_REALTEK_PHY is not set
|
||||||
|
# CONFIG_NATIONAL_PHY is not set
|
||||||
|
# CONFIG_STE10XP is not set
|
||||||
|
# CONFIG_LSI_ET1011C_PHY is not set
|
||||||
|
# CONFIG_FIXED_PHY is not set
|
||||||
|
# CONFIG_MDIO_BITBANG is not set
|
||||||
CONFIG_NET_ETHERNET=y
|
CONFIG_NET_ETHERNET=y
|
||||||
CONFIG_MII=y
|
CONFIG_MII=y
|
||||||
# CONFIG_AX88796 is not set
|
# CONFIG_AX88796 is not set
|
||||||
CONFIG_SMC91X=y
|
CONFIG_SMC91X=y
|
||||||
# CONFIG_DM9000 is not set
|
# CONFIG_DM9000 is not set
|
||||||
CONFIG_SMC911X=y
|
# CONFIG_SMC911X is not set
|
||||||
|
CONFIG_SMSC911X=y
|
||||||
# CONFIG_IBM_NEW_EMAC_ZMII is not set
|
# CONFIG_IBM_NEW_EMAC_ZMII is not set
|
||||||
# CONFIG_IBM_NEW_EMAC_RGMII is not set
|
# CONFIG_IBM_NEW_EMAC_RGMII is not set
|
||||||
# CONFIG_IBM_NEW_EMAC_TAH is not set
|
# CONFIG_IBM_NEW_EMAC_TAH is not set
|
||||||
|
|
|
@ -217,6 +217,9 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw)
|
||||||
/* read_can_lock - would read_trylock() succeed? */
|
/* read_can_lock - would read_trylock() succeed? */
|
||||||
#define __raw_read_can_lock(x) ((x)->lock < 0x80000000)
|
#define __raw_read_can_lock(x) ((x)->lock < 0x80000000)
|
||||||
|
|
||||||
|
#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
|
||||||
|
#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
|
||||||
|
|
||||||
#define _raw_spin_relax(lock) cpu_relax()
|
#define _raw_spin_relax(lock) cpu_relax()
|
||||||
#define _raw_read_relax(lock) cpu_relax()
|
#define _raw_read_relax(lock) cpu_relax()
|
||||||
#define _raw_write_relax(lock) cpu_relax()
|
#define _raw_write_relax(lock) cpu_relax()
|
||||||
|
|
|
@ -301,7 +301,7 @@ void release_thread(struct task_struct *dead_task)
|
||||||
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
||||||
|
|
||||||
int
|
int
|
||||||
copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start,
|
copy_thread(unsigned long clone_flags, unsigned long stack_start,
|
||||||
unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
|
unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
struct thread_info *thread = task_thread_info(p);
|
struct thread_info *thread = task_thread_info(p);
|
||||||
|
|
|
@ -198,17 +198,17 @@ static int at91_pm_verify_clocks(void)
|
||||||
/* USB must not be using PLLB */
|
/* USB must not be using PLLB */
|
||||||
if (cpu_is_at91rm9200()) {
|
if (cpu_is_at91rm9200()) {
|
||||||
if ((scsr & (AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP)) != 0) {
|
if ((scsr & (AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP)) != 0) {
|
||||||
pr_debug("AT91: PM - Suspend-to-RAM with USB still active\n");
|
pr_err("AT91: PM - Suspend-to-RAM with USB still active\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
} else if (cpu_is_at91sam9260() || cpu_is_at91sam9261() || cpu_is_at91sam9263() || cpu_is_at91sam9g20()) {
|
} else if (cpu_is_at91sam9260() || cpu_is_at91sam9261() || cpu_is_at91sam9263() || cpu_is_at91sam9g20()) {
|
||||||
if ((scsr & (AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP)) != 0) {
|
if ((scsr & (AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP)) != 0) {
|
||||||
pr_debug("AT91: PM - Suspend-to-RAM with USB still active\n");
|
pr_err("AT91: PM - Suspend-to-RAM with USB still active\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
} else if (cpu_is_at91cap9()) {
|
} else if (cpu_is_at91cap9()) {
|
||||||
if ((scsr & AT91CAP9_PMC_UHP) != 0) {
|
if ((scsr & AT91CAP9_PMC_UHP) != 0) {
|
||||||
pr_debug("AT91: PM - Suspend-to-RAM with USB still active\n");
|
pr_err("AT91: PM - Suspend-to-RAM with USB still active\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -223,7 +223,7 @@ static int at91_pm_verify_clocks(void)
|
||||||
|
|
||||||
css = at91_sys_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
|
css = at91_sys_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS;
|
||||||
if (css != AT91_PMC_CSS_SLOW) {
|
if (css != AT91_PMC_CSS_SLOW) {
|
||||||
pr_debug("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
|
pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -118,7 +118,7 @@ static struct resource ide_resources[] = {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
static u64 ide_dma_mask = DMA_32BIT_MASK;
|
static u64 ide_dma_mask = DMA_BIT_MASK(32);
|
||||||
|
|
||||||
static struct platform_device ide_dev = {
|
static struct platform_device ide_dev = {
|
||||||
.name = "palm_bk3710",
|
.name = "palm_bk3710",
|
||||||
|
@ -127,7 +127,7 @@ static struct platform_device ide_dev = {
|
||||||
.num_resources = ARRAY_SIZE(ide_resources),
|
.num_resources = ARRAY_SIZE(ide_resources),
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &ide_dma_mask,
|
.dma_mask = &ide_dma_mask,
|
||||||
.coherent_dma_mask = DMA_32BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(32),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
80
arch/arm/mach-davinci/include/mach/nand.h
Normal file
80
arch/arm/mach-davinci/include/mach/nand.h
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
/*
|
||||||
|
* mach-davinci/nand.h
|
||||||
|
*
|
||||||
|
* Copyright © 2006 Texas Instruments.
|
||||||
|
*
|
||||||
|
* Ported to 2.6.23 Copyright © 2008 by
|
||||||
|
* Sander Huijsen <Shuijsen@optelecom-nkf.com>
|
||||||
|
* Troy Kisky <troy.kisky@boundarydevices.com>
|
||||||
|
* Dirk Behme <Dirk.Behme@gmail.com>
|
||||||
|
*
|
||||||
|
* --------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ARCH_ARM_DAVINCI_NAND_H
|
||||||
|
#define __ARCH_ARM_DAVINCI_NAND_H
|
||||||
|
|
||||||
|
#include <linux/mtd/nand.h>
|
||||||
|
|
||||||
|
#define NRCSR_OFFSET 0x00
|
||||||
|
#define AWCCR_OFFSET 0x04
|
||||||
|
#define A1CR_OFFSET 0x10
|
||||||
|
#define NANDFCR_OFFSET 0x60
|
||||||
|
#define NANDFSR_OFFSET 0x64
|
||||||
|
#define NANDF1ECC_OFFSET 0x70
|
||||||
|
|
||||||
|
/* 4-bit ECC syndrome registers */
|
||||||
|
#define NAND_4BIT_ECC_LOAD_OFFSET 0xbc
|
||||||
|
#define NAND_4BIT_ECC1_OFFSET 0xc0
|
||||||
|
#define NAND_4BIT_ECC2_OFFSET 0xc4
|
||||||
|
#define NAND_4BIT_ECC3_OFFSET 0xc8
|
||||||
|
#define NAND_4BIT_ECC4_OFFSET 0xcc
|
||||||
|
#define NAND_ERR_ADD1_OFFSET 0xd0
|
||||||
|
#define NAND_ERR_ADD2_OFFSET 0xd4
|
||||||
|
#define NAND_ERR_ERRVAL1_OFFSET 0xd8
|
||||||
|
#define NAND_ERR_ERRVAL2_OFFSET 0xdc
|
||||||
|
|
||||||
|
/* NOTE: boards don't need to use these address bits
|
||||||
|
* for ALE/CLE unless they support booting from NAND.
|
||||||
|
* They're used unless platform data overrides them.
|
||||||
|
*/
|
||||||
|
#define MASK_ALE 0x08
|
||||||
|
#define MASK_CLE 0x10
|
||||||
|
|
||||||
|
struct davinci_nand_pdata { /* platform_data */
|
||||||
|
uint32_t mask_ale;
|
||||||
|
uint32_t mask_cle;
|
||||||
|
|
||||||
|
/* for packages using two chipselects */
|
||||||
|
uint32_t mask_chipsel;
|
||||||
|
|
||||||
|
/* board's default static partition info */
|
||||||
|
struct mtd_partition *parts;
|
||||||
|
unsigned nr_parts;
|
||||||
|
|
||||||
|
/* none == NAND_ECC_NONE (strongly *not* advised!!)
|
||||||
|
* soft == NAND_ECC_SOFT
|
||||||
|
* 1-bit == NAND_ECC_HW
|
||||||
|
* 4-bit == NAND_ECC_HW_SYNDROME (not on all chips)
|
||||||
|
*/
|
||||||
|
nand_ecc_modes_t ecc_mode;
|
||||||
|
|
||||||
|
/* e.g. NAND_BUSWIDTH_16 or NAND_USE_FLASH_BBT */
|
||||||
|
unsigned options;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* __ARCH_ARM_DAVINCI_NAND_H */
|
|
@ -64,7 +64,7 @@ static struct resource usb_resources[] = {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
static u64 usb_dmamask = DMA_32BIT_MASK;
|
static u64 usb_dmamask = DMA_BIT_MASK(32);
|
||||||
|
|
||||||
static struct platform_device usb_dev = {
|
static struct platform_device usb_dev = {
|
||||||
.name = "musb_hdrc",
|
.name = "musb_hdrc",
|
||||||
|
@ -72,7 +72,7 @@ static struct platform_device usb_dev = {
|
||||||
.dev = {
|
.dev = {
|
||||||
.platform_data = &usb_data,
|
.platform_data = &usb_data,
|
||||||
.dma_mask = &usb_dmamask,
|
.dma_mask = &usb_dmamask,
|
||||||
.coherent_dma_mask = DMA_32BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(32),
|
||||||
},
|
},
|
||||||
.resource = usb_resources,
|
.resource = usb_resources,
|
||||||
.num_resources = ARRAY_SIZE(usb_resources),
|
.num_resources = ARRAY_SIZE(usb_resources),
|
||||||
|
|
|
@ -28,7 +28,7 @@ static inline void arch_idle(void)
|
||||||
cpu_do_idle();
|
cpu_do_idle();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void arch_reset(char mode)
|
static inline void arch_reset(char mode, const char *cmd)
|
||||||
{
|
{
|
||||||
__raw_writel(RESET_GLOBAL | RESET_CPU1,
|
__raw_writel(RESET_GLOBAL | RESET_CPU1,
|
||||||
IO_ADDRESS(GEMINI_GLOBAL_BASE) + GLOBAL_RESET);
|
IO_ADDRESS(GEMINI_GLOBAL_BASE) + GLOBAL_RESET);
|
||||||
|
|
|
@ -307,7 +307,7 @@ static struct resource iop13xx_adma_2_resources[] = {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static u64 iop13xx_adma_dmamask = DMA_64BIT_MASK;
|
static u64 iop13xx_adma_dmamask = DMA_BIT_MASK(64);
|
||||||
static struct iop_adma_platform_data iop13xx_adma_0_data = {
|
static struct iop_adma_platform_data iop13xx_adma_0_data = {
|
||||||
.hw_id = 0,
|
.hw_id = 0,
|
||||||
.pool_size = PAGE_SIZE,
|
.pool_size = PAGE_SIZE,
|
||||||
|
@ -331,7 +331,7 @@ static struct platform_device iop13xx_adma_0_channel = {
|
||||||
.resource = iop13xx_adma_0_resources,
|
.resource = iop13xx_adma_0_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &iop13xx_adma_dmamask,
|
.dma_mask = &iop13xx_adma_dmamask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
.platform_data = (void *) &iop13xx_adma_0_data,
|
.platform_data = (void *) &iop13xx_adma_0_data,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -343,7 +343,7 @@ static struct platform_device iop13xx_adma_1_channel = {
|
||||||
.resource = iop13xx_adma_1_resources,
|
.resource = iop13xx_adma_1_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &iop13xx_adma_dmamask,
|
.dma_mask = &iop13xx_adma_dmamask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
.platform_data = (void *) &iop13xx_adma_1_data,
|
.platform_data = (void *) &iop13xx_adma_1_data,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -355,7 +355,7 @@ static struct platform_device iop13xx_adma_2_channel = {
|
||||||
.resource = iop13xx_adma_2_resources,
|
.resource = iop13xx_adma_2_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &iop13xx_adma_dmamask,
|
.dma_mask = &iop13xx_adma_dmamask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
.platform_data = (void *) &iop13xx_adma_2_data,
|
.platform_data = (void *) &iop13xx_adma_2_data,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
|
@ -151,7 +151,7 @@ static struct resource iop13xx_tpmi_3_resources[] = {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
u64 iop13xx_tpmi_mask = DMA_64BIT_MASK;
|
u64 iop13xx_tpmi_mask = DMA_BIT_MASK(64);
|
||||||
static struct platform_device iop13xx_tpmi_0_device = {
|
static struct platform_device iop13xx_tpmi_0_device = {
|
||||||
.name = "iop-tpmi",
|
.name = "iop-tpmi",
|
||||||
.id = 0,
|
.id = 0,
|
||||||
|
@ -159,7 +159,7 @@ static struct platform_device iop13xx_tpmi_0_device = {
|
||||||
.resource = iop13xx_tpmi_0_resources,
|
.resource = iop13xx_tpmi_0_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &iop13xx_tpmi_mask,
|
.dma_mask = &iop13xx_tpmi_mask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -170,7 +170,7 @@ static struct platform_device iop13xx_tpmi_1_device = {
|
||||||
.resource = iop13xx_tpmi_1_resources,
|
.resource = iop13xx_tpmi_1_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &iop13xx_tpmi_mask,
|
.dma_mask = &iop13xx_tpmi_mask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -181,7 +181,7 @@ static struct platform_device iop13xx_tpmi_2_device = {
|
||||||
.resource = iop13xx_tpmi_2_resources,
|
.resource = iop13xx_tpmi_2_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &iop13xx_tpmi_mask,
|
.dma_mask = &iop13xx_tpmi_mask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -192,7 +192,7 @@ static struct platform_device iop13xx_tpmi_3_device = {
|
||||||
.resource = iop13xx_tpmi_3_resources,
|
.resource = iop13xx_tpmi_3_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &iop13xx_tpmi_mask,
|
.dma_mask = &iop13xx_tpmi_mask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -508,7 +508,7 @@ static struct mv_xor_platform_shared_data kirkwood_xor_shared_data = {
|
||||||
.dram = &kirkwood_mbus_dram_info,
|
.dram = &kirkwood_mbus_dram_info,
|
||||||
};
|
};
|
||||||
|
|
||||||
static u64 kirkwood_xor_dmamask = DMA_32BIT_MASK;
|
static u64 kirkwood_xor_dmamask = DMA_BIT_MASK(32);
|
||||||
|
|
||||||
|
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
|
@ -559,7 +559,7 @@ static struct platform_device kirkwood_xor00_channel = {
|
||||||
.resource = kirkwood_xor00_resources,
|
.resource = kirkwood_xor00_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &kirkwood_xor_dmamask,
|
.dma_mask = &kirkwood_xor_dmamask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
.platform_data = (void *)&kirkwood_xor00_data,
|
.platform_data = (void *)&kirkwood_xor00_data,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -585,7 +585,7 @@ static struct platform_device kirkwood_xor01_channel = {
|
||||||
.resource = kirkwood_xor01_resources,
|
.resource = kirkwood_xor01_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &kirkwood_xor_dmamask,
|
.dma_mask = &kirkwood_xor_dmamask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
.platform_data = (void *)&kirkwood_xor01_data,
|
.platform_data = (void *)&kirkwood_xor01_data,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -657,7 +657,7 @@ static struct platform_device kirkwood_xor10_channel = {
|
||||||
.resource = kirkwood_xor10_resources,
|
.resource = kirkwood_xor10_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &kirkwood_xor_dmamask,
|
.dma_mask = &kirkwood_xor_dmamask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
.platform_data = (void *)&kirkwood_xor10_data,
|
.platform_data = (void *)&kirkwood_xor10_data,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
@ -683,7 +683,7 @@ static struct platform_device kirkwood_xor11_channel = {
|
||||||
.resource = kirkwood_xor11_resources,
|
.resource = kirkwood_xor11_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &kirkwood_xor_dmamask,
|
.dma_mask = &kirkwood_xor_dmamask,
|
||||||
.coherent_dma_mask = DMA_64BIT_MASK,
|
.coherent_dma_mask = DMA_BIT_MASK(64),
|
||||||
.platform_data = (void *)&kirkwood_xor11_data,
|
.platform_data = (void *)&kirkwood_xor11_data,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
|
@ -14,7 +14,7 @@ static inline void arch_idle(void)
|
||||||
cpu_do_idle();
|
cpu_do_idle();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void arch_reset(char mode)
|
static inline void arch_reset(char mode, const char *cmd)
|
||||||
{
|
{
|
||||||
cpu_reset(0);
|
cpu_reset(0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,9 @@
|
||||||
|
|
||||||
obj-y += generic.o clock.o devices.o
|
obj-y += generic.o clock.o devices.o
|
||||||
|
|
||||||
|
# Support for CMOS sensor interface
|
||||||
|
obj-$(CONFIG_MX1_VIDEO) += ksym_mx1.o mx1_camera_fiq.o
|
||||||
|
|
||||||
# Specific board support
|
# Specific board support
|
||||||
obj-$(CONFIG_ARCH_MX1ADS) += mx1ads.o
|
obj-$(CONFIG_ARCH_MX1ADS) += mx1ads.o
|
||||||
obj-$(CONFIG_MACH_SCB9328) += scb9328.o
|
obj-$(CONFIG_MACH_SCB9328) += scb9328.o
|
|
@ -44,7 +44,7 @@ static struct resource imx_csi_resources[] = {
|
||||||
static u64 imx_csi_dmamask = 0xffffffffUL;
|
static u64 imx_csi_dmamask = 0xffffffffUL;
|
||||||
|
|
||||||
struct platform_device imx_csi_device = {
|
struct platform_device imx_csi_device = {
|
||||||
.name = "imx-csi",
|
.name = "mx1-camera",
|
||||||
.id = 0, /* This is used to put cameras on this interface */
|
.id = 0, /* This is used to put cameras on this interface */
|
||||||
.dev = {
|
.dev = {
|
||||||
.dma_mask = &imx_csi_dmamask,
|
.dma_mask = &imx_csi_dmamask,
|
||||||
|
|
18
arch/arm/mach-mx1/ksym_mx1.c
Normal file
18
arch/arm/mach-mx1/ksym_mx1.c
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
/*
|
||||||
|
* Exported ksyms of ARCH_MX1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2008, Darius Augulis <augulis.darius@gmail.com>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/platform_device.h>
|
||||||
|
#include <linux/module.h>
|
||||||
|
|
||||||
|
#include <mach/mx1_camera.h>
|
||||||
|
|
||||||
|
/* IMX camera FIQ handler */
|
||||||
|
EXPORT_SYMBOL(mx1_camera_sof_fiq_start);
|
||||||
|
EXPORT_SYMBOL(mx1_camera_sof_fiq_end);
|
35
arch/arm/mach-mx1/mx1_camera_fiq.S
Normal file
35
arch/arm/mach-mx1/mx1_camera_fiq.S
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2008 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
|
||||||
|
*
|
||||||
|
* Based on linux/arch/arm/lib/floppydma.S
|
||||||
|
* Copyright (C) 1995, 1996 Russell King
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License version 2 as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
#include <linux/linkage.h>
|
||||||
|
#include <asm/assembler.h>
|
||||||
|
|
||||||
|
.text
|
||||||
|
.global mx1_camera_sof_fiq_end
|
||||||
|
.global mx1_camera_sof_fiq_start
|
||||||
|
mx1_camera_sof_fiq_start:
|
||||||
|
@ enable dma
|
||||||
|
ldr r12, [r9]
|
||||||
|
orr r12, r12, #0x00000001
|
||||||
|
str r12, [r9]
|
||||||
|
@ unmask DMA interrupt
|
||||||
|
ldr r12, [r8]
|
||||||
|
bic r12, r12, r13
|
||||||
|
str r12, [r8]
|
||||||
|
@ disable SOF interrupt
|
||||||
|
ldr r12, [r10]
|
||||||
|
bic r12, r12, #0x00010000
|
||||||
|
str r12, [r10]
|
||||||
|
@ clear SOF flag
|
||||||
|
mov r12, #0x00010000
|
||||||
|
str r12, [r11]
|
||||||
|
@ return from FIQ
|
||||||
|
subs pc, lr, #4
|
||||||
|
mx1_camera_sof_fiq_end:
|
|
@ -533,7 +533,7 @@ static struct clk_lookup lookups[] __initdata = {
|
||||||
_REGISTER_CLOCK(NULL, "kpp", kpp_clk)
|
_REGISTER_CLOCK(NULL, "kpp", kpp_clk)
|
||||||
_REGISTER_CLOCK("fsl-usb2-udc", "usb", usb_clk1)
|
_REGISTER_CLOCK("fsl-usb2-udc", "usb", usb_clk1)
|
||||||
_REGISTER_CLOCK("fsl-usb2-udc", "usb_ahb", usb_clk2)
|
_REGISTER_CLOCK("fsl-usb2-udc", "usb_ahb", usb_clk2)
|
||||||
_REGISTER_CLOCK("mx3-camera.0", "csi", csi_clk)
|
_REGISTER_CLOCK("mx3-camera.0", NULL, csi_clk)
|
||||||
_REGISTER_CLOCK("imx-uart.0", NULL, uart1_clk)
|
_REGISTER_CLOCK("imx-uart.0", NULL, uart1_clk)
|
||||||
_REGISTER_CLOCK("imx-uart.1", NULL, uart2_clk)
|
_REGISTER_CLOCK("imx-uart.1", NULL, uart2_clk)
|
||||||
_REGISTER_CLOCK("imx-uart.2", NULL, uart3_clk)
|
_REGISTER_CLOCK("imx-uart.2", NULL, uart3_clk)
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
#include <linux/mtd/plat-ram.h>
|
#include <linux/mtd/plat-ram.h>
|
||||||
#include <linux/memory.h>
|
#include <linux/memory.h>
|
||||||
#include <linux/gpio.h>
|
#include <linux/gpio.h>
|
||||||
#include <linux/smc911x.h>
|
#include <linux/smsc911x.h>
|
||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
#include <linux/i2c.h>
|
#include <linux/i2c.h>
|
||||||
#include <linux/i2c/at24.h>
|
#include <linux/i2c/at24.h>
|
||||||
|
@ -70,7 +70,7 @@ static struct imxuart_platform_data uart_pdata = {
|
||||||
.flags = IMXUART_HAVE_RTSCTS,
|
.flags = IMXUART_HAVE_RTSCTS,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct resource smc911x_resources[] = {
|
static struct resource smsc911x_resources[] = {
|
||||||
[0] = {
|
[0] = {
|
||||||
.start = CS1_BASE_ADDR + 0x300,
|
.start = CS1_BASE_ADDR + 0x300,
|
||||||
.end = CS1_BASE_ADDR + 0x300 + SZ_64K - 1,
|
.end = CS1_BASE_ADDR + 0x300 + SZ_64K - 1,
|
||||||
|
@ -79,22 +79,25 @@ static struct resource smc911x_resources[] = {
|
||||||
[1] = {
|
[1] = {
|
||||||
.start = IOMUX_TO_IRQ(MX31_PIN_GPIO3_1),
|
.start = IOMUX_TO_IRQ(MX31_PIN_GPIO3_1),
|
||||||
.end = IOMUX_TO_IRQ(MX31_PIN_GPIO3_1),
|
.end = IOMUX_TO_IRQ(MX31_PIN_GPIO3_1),
|
||||||
.flags = IORESOURCE_IRQ,
|
.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_LOWLEVEL,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct smc911x_platdata smc911x_info = {
|
static struct smsc911x_platform_config smsc911x_info = {
|
||||||
.flags = SMC911X_USE_32BIT,
|
.flags = SMSC911X_USE_32BIT | SMSC911X_FORCE_INTERNAL_PHY |
|
||||||
.irq_flags = IRQF_SHARED | IRQF_TRIGGER_LOW,
|
SMSC911X_SAVE_MAC_ADDRESS,
|
||||||
|
.irq_polarity = SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
|
||||||
|
.irq_type = SMSC911X_IRQ_TYPE_OPEN_DRAIN,
|
||||||
|
.phy_interface = PHY_INTERFACE_MODE_MII,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct platform_device pcm037_eth = {
|
static struct platform_device pcm037_eth = {
|
||||||
.name = "smc911x",
|
.name = "smsc911x",
|
||||||
.id = -1,
|
.id = -1,
|
||||||
.num_resources = ARRAY_SIZE(smc911x_resources),
|
.num_resources = ARRAY_SIZE(smsc911x_resources),
|
||||||
.resource = smc911x_resources,
|
.resource = smsc911x_resources,
|
||||||
.dev = {
|
.dev = {
|
||||||
.platform_data = &smc911x_info,
|
.platform_data = &smsc911x_info,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -80,7 +80,7 @@
|
||||||
#define NETX_PA_XPEC(no) (NETX_IO_PHYS + NETX_OFS_XPEC(no))
|
#define NETX_PA_XPEC(no) (NETX_IO_PHYS + NETX_OFS_XPEC(no))
|
||||||
#define NETX_PA_VIC (NETX_IO_PHYS + NETX_OFS_VIC)
|
#define NETX_PA_VIC (NETX_IO_PHYS + NETX_OFS_VIC)
|
||||||
|
|
||||||
/* virual addresses */
|
/* virtual addresses */
|
||||||
#define NETX_VA_SYSTEM (NETX_IO_VIRT + NETX_OFS_SYSTEM)
|
#define NETX_VA_SYSTEM (NETX_IO_VIRT + NETX_OFS_SYSTEM)
|
||||||
#define NETX_VA_MEMCR (NETX_IO_VIRT + NETX_OFS_MEMCR)
|
#define NETX_VA_MEMCR (NETX_IO_VIRT + NETX_OFS_MEMCR)
|
||||||
#define NETX_VA_DPMAS (NETX_IO_VIRT + NETX_OFS_DPMAS)
|
#define NETX_VA_DPMAS (NETX_IO_VIRT + NETX_OFS_DPMAS)
|
||||||
|
|
|
@ -109,7 +109,7 @@ config MACH_OMAP_PALMZ71
|
||||||
help
|
help
|
||||||
Support for the Palm Zire71 PDA. To boot the kernel,
|
Support for the Palm Zire71 PDA. To boot the kernel,
|
||||||
you'll need a PalmOS compatible bootloader; check out
|
you'll need a PalmOS compatible bootloader; check out
|
||||||
http://hackndev.com/palm/z71 for more informations.
|
http://hackndev.com/palm/z71 for more information.
|
||||||
Say Y here if you have such a PDA, say N otherwise.
|
Say Y here if you have such a PDA, say N otherwise.
|
||||||
|
|
||||||
config MACH_OMAP_PALMTT
|
config MACH_OMAP_PALMTT
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue