-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcore.py
920 lines (753 loc) · 35.6 KB
/
core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
# proc: Simple interface to Linux process information.
#
# Author: Peter Odding <[email protected]>
# Last Change: April 26, 2020
# URL: https://proc.readthedocs.io
"""
The :mod:`proc.core` module contains the core functionality of the `proc` package.
This module provides a simple interface to the process information available in
``/proc``. It takes care of the text parsing that's necessary to gather process
information from ``/proc`` but it doesn't do much more than that. The functions
in this module produce :class:`Process` objects.
If you're just getting started with this module I suggest you jump to the
documentation of :func:`find_processes()` because this function provides the
"top level entry point" into most of the functionality provided by this
module.
"""
# Standard library modules.
import collections
import errno
import grp
import logging
import os
import pwd
import time
# External dependencies.
from executor import which
from proc.unix import UnixProcess
from property_manager import lazy_property
from humanfriendly.deprecation import define_aliases
# Public identifiers that require documentation.
__all__ = (
'NUM_RACE_CONDITIONS',
'OwnerIDs',
'Process',
'ProtectedAccess',
'find_processes',
'find_system_uptime',
'gid_to_name',
'logger',
'parse_process_cmdline',
'parse_process_status',
'sorted_by_pid',
'uid_to_name',
)
# Initialize a logger.
logger = logging.getLogger(__name__)
NUM_RACE_CONDITIONS = dict(cmdline=0, environ=0, exe=0, stat=0, status=0)
"""
A dictionary with string keys and integer values that's used to keep global
counters that track the number of detected race conditions. This is only useful
for the test suite, because it intentionally creates race conditions to verify
that they are properly handled.
"""
class Process(UnixProcess):
"""
Process information based on ``/proc/[pid]/stat`` and similar files.
:class:`Process` objects are constructed using
:func:`find_processes()` and :func:`Process.from_path()`. You
shouldn't be using the :class:`Process` constructor directly unless you
know what you're doing.
The :class:`Process` class extends :class:`~proc.unix.UnixProcess` which means
all of the process manipulation supported by :class:`~proc.unix.UnixProcess`
is also supported by :class:`Process` objects.
**Comparison to official /proc documentation**
Quite a few of the instance properties of this class are based on (and
named after) fields extracted from ``/proc/[pid]/stat``. The following
table lists these properties and the *zero based index* of the
corresponding field in ``/proc/[pid]/stat``:
==================== =====
Property Index
==================== =====
:attr:`pid` 0
:attr:`comm` 1
:attr:`state` 2
:attr:`ppid` 3
:attr:`pgrp` 4
:attr:`session` 5
:attr:`starttime` 21
:attr:`vsize` 22
:attr:`rss` 23
==================== =====
As you can see from the indexes in the table above quite a few fields from
``/proc/[pid]/stat`` are not currently exposed by :class:`Process`
objects. In fact ``/proc/[pid]/stat`` contains 44 fields! Some of these
fields are no longer maintained by the Linux kernel and remain only for
backwards compatibility (so exposing them is not useful) while other fields
are not exposed because I didn't consider them relevant to a Python API. If
your use case requires fields that are not yet exposed, feel free to
suggest additional fields to expose in the issue tracker.
The documentation on the properties of this class quotes from and
paraphrases the text in `man 5 proc`_ so if things are unclear and you're
feeling up to it, dive into the huge manual page for clarifications :-).
.. _man 5 proc: http://linux.die.net/man/5/proc
"""
@classmethod
def from_path(cls, directory):
"""
Construct a process information object from a numerical subdirectory of ``/proc``.
:param directory: The absolute pathname of the numerical subdirectory
of ``/proc`` to get process information from (a
string).
:returns: A process information object or ``None`` (in case the process
ends before its information can be read).
This class method is used by :func:`find_processes()` to construct
:class:`Process` objects. It's exposed as a separate method because
it may sometimes be useful to call directly. For example:
>>> from proc.core import Process
>>> Process.from_path('/proc/self')
Process(pid=1468,
comm='python',
state='R',
ppid=21982,
pgrp=1468,
session=21982,
vsize=40431616,
rss=8212480,
cmdline=['python'],
exe='/home/peter/.virtualenvs/proc/bin/python')
"""
fields = parse_process_status(directory)
if fields:
return cls(directory, fields)
@classmethod
def from_pid(cls, pid):
"""
Construct a process information object based on a process ID.
:param pid: The process ID (an integer).
:returns: A process information object or ``None`` (in case the process
ends before its information can be read).
"""
return cls.from_path(os.path.join('/proc', str(pid)))
def __init__(self, proc_tree, stat_fields):
"""
Initialize a :class:`Process` object.
:param proc_tree: The absolute pathname of the numerical subdirectory
of ``/proc`` on which the process information is
based (a string).
:param stat_fields: The tokenized fields from ``/proc/[pid]/stat`` (a
list of strings).
"""
# Initialize the superclass.
super(Process, self).__init__()
# Initialize instance variables.
self.proc_tree = proc_tree
self.stat_fields = stat_fields
# Define aliases for two previously renamed methods.
self.cont = self.resume
self.stop = self.suspend
def __repr__(self):
"""
Create a human readable representation of a process information object.
:returns: A string containing what looks like a :class:`Process`
constructor, but showing public properties instead of
internal properties.
"""
fields = []
for name, optional in (('pid', False),
('comm', False),
('state', False),
('ppid', True),
('pgrp', False),
('session', False),
('starttime', False),
('vsize', False),
('rss', False),
('cmdline', True),
('exe', True),
('exe_path', True),
('exe_name', False)):
value = getattr(self, name)
if not (optional and not value):
fields.append("%s=%r" % (name, value))
return "%s(%s)" % (self.__class__.__name__, ", ".join(fields))
@lazy_property
def cmdline(self):
"""
The complete command line for the process (a list of strings).
**Availability:**
- This property is parsed from the contents of ``/proc/[pid]/cmdline``
the first time it is referenced, after that its value is cached so it
will always be available (although by then it may no longer be up to
date because processes can change their command line at runtime on
Linux).
- If this property is first referenced after the process turns into a
zombie_ or the process ends then it's too late to read the contents
of ``/proc/[pid]/cmdline`` and an empty list is returned.
.. note:: In Linux it is possible for a process to change its command
line after it has started. Modern daemons tend to do this in
order to communicate their status. Here's an example of how
the Nginx web server uses this feature:
>>> from proc.core import find_processes
>>> from pprint import pprint
>>> pprint([(p.pid, p.cmdline) for p in find_processes() if p.comm == 'nginx'])
[(2662, ['nginx: master process /usr/sbin/nginx']),
(25100, ['nginx: worker process']),
(25101, ['nginx: worker process']),
(25102, ['nginx: worker process']),
(25103, ['nginx: worker process'])]
What this means is that (depending on the behavior of the
process in question) it may be impossible to determine the
effective command line that was used to start a process. If
you're just interested in the pathname of the executable
consider using the :attr:`exe` property instead:
>>> from proc.core import find_processes
>>> from pprint import pprint
>>> pprint([(p.pid, p.exe) for p in find_processes() if p.comm == 'nginx'])
[(2662, '/usr/sbin/nginx'),
(25100, '/usr/sbin/nginx'),
(25101, '/usr/sbin/nginx'),
(25102, '/usr/sbin/nginx'),
(25103, '/usr/sbin/nginx')]
"""
return parse_process_cmdline(self.proc_tree)
@lazy_property
def comm(self):
"""
The filename of the executable.
**Availability:** This property is parsed from the contents of
``/proc/[pid]/stat`` and is always available.
The filename is not enclosed in parentheses like it is in
``/proc/[pid]/stat`` because the parentheses are an implementation
detail of the encoding of ``/proc/[pid]/stat`` and the whole point of
:mod:`proc.core` is to hide ugly encoding details like this :-).
.. note:: This field can be truncated by the Linux kernel so strictly
speaking you can't rely on this field unless you know that
the executables you're interested in have short names. Here's
an example of what I'm talking about:
>>> from proc.core import find_processes
>>> next(p for p in find_processes() if p.comm.startswith('console'))
Process(pid=2753,
comm='console-kit-dae',
state='S',
ppid=1,
pgrp=1632,
session=1632,
vsize=2144198656,
rss=733184,
cmdline=['/usr/sbin/console-kit-daemon', '--no-daemon'])
As you can see in the example above the executable name
``console-kit-daemon`` is truncated to ``console-kit-dae``.
If you need a reliable way to find the executable name
consider using the :attr:`cmdline` and/or :attr:`exe`
properties.
"""
return self.stat_fields[1]
@property
def command_line(self):
"""
An alias for the :attr:`cmdline` property.
This alias exists so that :class:`~executor.process.ControllableProcess`
can log process ids and command lines (this helps to make the log
output more human friendly).
"""
return self.cmdline
@property
def cwd(self):
"""
The working directory of the process (a string or :data:`None`).
**Availability:**
- This property is constructed by dereferencing the symbolic link
``/proc/[pid]/cwd`` each time the property is referenced (because the
working directory may change at any time).
- If this property is referenced after the process has ended then it's
too late to dereference the symbolic link and an empty string is
returned.
- If an exception is encountered while dereferencing the symbolic link
(for example because you don't have permission to dereference the
symbolic link) the exception is swallowed and an empty string is
returned.
"""
with ProtectedAccess('cwd', "dereference working directory"):
return os.readlink(os.path.join(self.proc_tree, 'cwd'))
return ''
@lazy_property
def environ(self):
"""
The environment of the process (a dictionary with string key/value pairs).
**Availability:**
- This property is parsed from the contents of ``/proc/[pid]/environ``
the first time it is referenced, after that its value is cached so it
will always be available.
- If this property is first referenced after the process turns into a
zombie_ or the process ends then it's too late to read the contents
of ``/proc/[pid]/environ`` and an empty dictionary is returned.
"""
variables = {}
with ProtectedAccess('environ', "read process environment"):
with open(os.path.join(self.proc_tree, 'environ')) as handle:
contents = handle.read()
if contents:
for token in contents.split('\0'):
name, _, value = token.partition('=')
if name:
variables[name] = value
return variables
@lazy_property
def exe(self):
"""
The actual pathname of the executed command (a string).
**Availability:**
- This property is constructed by dereferencing the symbolic link
``/proc/[pid]/exe`` the first time the property is referenced, after
that its value is cached so it will always be available.
- If this property is referenced after the process has ended then it's
too late to dereference the symbolic link and an empty string is
returned.
- If an exception is encountered while dereferencing the symbolic link
(for example because you don't have permission to dereference the
symbolic link) the exception is swallowed and an empty string is
returned.
"""
with ProtectedAccess('exe', "dereference executable path"):
return os.readlink(os.path.join(self.proc_tree, 'exe'))
return ''
@lazy_property
def exe_name(self):
"""
The base name of the executable (a string).
It can be tricky to reliably determine the name of the executable of an
arbitrary process and this property tries to make it easier. Its value
is based on the first of the following methods that works:
1. If :attr:`exe_path` is available then the base name of this
pathname is returned.
- Pro: When the :attr:`exe_path` property is available it is
fairly reliable.
- Con: The :attr:`exe_path` property can be unavailable (refer to
its documentation for details).
2. If the first string in :attr:`cmdline` contains a name that is
available on the executable search path (``$PATH``) then this name
is returned.
- Pro: As long as :attr:`cmdline` contains the name of an
executable available on the ``$PATH`` this method works.
- Con: This method can fail because a process has changed its own
command line (after it was started).
3. If both of the methods above fail :attr:`comm` is returned.
- Pro: The :attr:`comm` field is always available.
- Con: The :attr:`comm` field may be truncated.
"""
if self.exe_path:
return os.path.basename(self.exe_path)
if self.cmdline:
name = self.cmdline[0]
if os.path.basename(name) == name and which(name):
return name
return self.comm
@lazy_property
def exe_path(self):
"""
The absolute pathname of the executable (a string).
It can be tricky to reliably determine the pathname of the executable
of an arbitrary process and this property tries to make it easier. Its
value is based on the first of the following methods that works:
1. If :attr:`exe` is available then this pathname is returned.
- Pro: This method provides the most reliable way to determine the
absolute pathname of the executed command because (as far as I
know) it always provides an absolute pathname.
- Con: This method can fail because you don't have permission to
dereference the ``/proc/[pid]/exe`` symbolic link.
2. If the first string in :attr:`cmdline` contains the absolute
pathname of an executable file then this pathname is returned.
- Pro: This method doesn't require the same permissions that method
one requires.
- Con: This method can fail because a process has changed its own
command line (after it was started) or because the first string in
the command line isn't an absolute pathname.
3. If both of the methods above fail an empty string is returned.
"""
if self.exe:
return self.exe
if self.cmdline:
name = self.cmdline[0]
if os.path.isabs(name) and os.access(name, os.X_OK):
return name
return ''
@lazy_property
def group(self):
"""
The name of the real group ID (a string).
**Availability:** Refer to :attr:`group_ids`. :data:`None` is returned
if :attr:`group_ids` is unavailable or :func:`gid_to_name()` fails.
"""
return gid_to_name(self.group_ids.real) if self.group_ids else None
@lazy_property
def group_ids(self):
"""
The real, effective, saved set, and filesystem GIDs of the process (an :class:`OwnerIDs` object).
**Availability:** Refer to :attr:`status_fields`. :data:`None` is
returned if :attr:`status_fields` is unavailable.
"""
return self._parse_ids('Gid')
@property
def is_alive(self):
"""
:data:`True` if the process is still alive, :data:`False` otherwise.
This property reads the ``/proc/[pid]/stat`` file each time the
property is referenced to make sure that the process still exists and
has not turned into a zombie_ process.
See also :func:`~proc.unix.UnixProcess.suspend()`,
:func:`~proc.unix.UnixProcess.resume()`,
:meth:`~executor.process.ControllableProcess.terminate()` and
:meth:`~executor.process.ControllableProcess.kill()`.
"""
stat_fields = parse_process_status(self.proc_tree, silent=True)
return bool(stat_fields and stat_fields[2] != 'Z')
@property
def is_running(self):
"""
An alias for :attr:`is_alive`.
This alias makes :class:`~proc.unix.UnixProcess` objects aware of
zombie_ processes so that e.g. killing of a zombie process doesn't hang
indefinitely (waiting for a zombie that will never die).
"""
return self.is_alive
@lazy_property
def pgrp(self):
"""
The process group ID of the process (an integer).
**Availability:** This property is parsed from the contents of
``/proc/[pid]/stat`` and is always available.
"""
return int(self.stat_fields[4])
@lazy_property
def pid(self):
"""
The process ID (an integer).
**Availability:** This property is parsed from the contents of
``/proc/[pid]/stat`` and is always available.
"""
return int(self.stat_fields[0])
@lazy_property
def ppid(self):
"""
The process ID of the parent process (an integer).
**Availability:** This property is parsed from the contents of
``/proc/[pid]/stat`` and is always available.
This field is zero when the process doesn't have a parent process (same
as in ``/proc/[pid]/stat``). Because Python treats the integer 0 as
:data:`False` this field can be used as follows to find processes
without a parent process:
>>> from proc.core import find_processes
>>> pprint([p for p in find_processes() if not p.ppid])
[Process(pid=1, comm='init', state='S', pgrp=1, session=1, vsize=25174016, rss=1667072, cmdline=['/sbin/init']),
Process(pid=2, comm='kthreadd', state='S', pgrp=0, session=0, vsize=0, rss=0)]
"""
return int(self.stat_fields[3])
@lazy_property
def rss(self):
"""
The resident set size of the process *in bytes* (an integer).
Quoting from `man 5 proc`_:
*Number of pages the process has in real memory. This is just the
pages which count toward text, data, or stack space. This does not
include pages which have not been demand-loaded in, or which are
swapped out.*
This property translates *pages* to *bytes* by multiplying the value
extracted from ``/proc/[pid]/stat`` with the result of:
.. code-block:: python
os.sysconf('SC_PAGESIZE')
**Availability:** This property is parsed from the contents of
``/proc/[pid]/stat`` and is always available.
"""
return int(self.stat_fields[23]) * os.sysconf('SC_PAGESIZE')
@property
def runtime(self):
"""
The time in seconds since the process started (a float).
This property is calculated based on :attr:`starttime` every time
it's requested (so it will always be up to date).
.. warning:: The runtime will not stop growing when the process ends
because doing so would require a background thread just to
monitor when the process ends... This is an unfortunate
side effect of the architecture of ``/proc`` -- processes
disappear from ``/proc`` the moment they end so the
information about when the process ended is lost!
"""
return max(0, time.time() - self.starttime)
@lazy_property
def session(self):
"""
The session ID of the process (an integer).
**Availability:** This property is parsed from the contents of
``/proc/[pid]/stat`` and is always available.
"""
return int(self.stat_fields[5])
@lazy_property
def starttime(self):
"""
The time at which the process was started (a float).
Paraphrasing from `man 5 proc`_:
*The time the process started after system boot. In kernels before
Linux 2.6, this value was expressed in jiffies. Since Linux 2.6, the
value is expressed in clock ticks.*
This property translates *clock ticks* to *seconds* by dividing the
value extracted from ``/proc/[pid]/stat`` with the result of:
.. code-block:: python
os.sysconf('SC_CLK_TCK')
After the conversion to seconds the system's uptime is used to
determine the absolute start time of the process (the number of seconds
since the Unix epoch_).
See also the :attr:`runtime` property.
**Availability:** This property is calculated from the contents of
``/proc/[pid]/stat`` and ``/proc/uptime`` and is always available.
.. _epoch: http://en.wikipedia.org/wiki/Unix_time
"""
system_boot = time.time() - find_system_uptime()
ticks_after_boot = int(self.stat_fields[21])
seconds_after_boot = ticks_after_boot / float(os.sysconf('SC_CLK_TCK'))
return system_boot + seconds_after_boot
@lazy_property
def state(self):
"""
A single uppercase character describing the state of the process (a string).
Quoting from `man 5 proc`_:
*One character from the string "RSDZTW" where R is running, S is
sleeping in an interruptible wait, D is waiting in uninterruptible
disk sleep, Z is zombie_, T is traced or stopped (on a signal), and W
is paging.*
**Availability:** This property is parsed from the contents of
``/proc/[pid]/stat`` and is always available.
.. _zombie: http://en.wikipedia.org/wiki/Zombie_process
"""
return self.stat_fields[2]
@lazy_property
def status_fields(self):
"""
Detailed process information (a dictionary with string key/value pairs).
The dictionaries constructed by this property are based on the contents
of ``/proc/[pid]/status``, which `man 5 proc`_ describes as follows:
*Provides much of the information in /proc/[pid]/stat and
/proc/[pid]/statm in a format that's easier for humans to parse.*
While it's true that there is quite a lot of overlap between
``/proc/[pid]/stat`` and ``/proc/[pid]/status``, the latter also
exposes important information that isn't available elsewhere (e.g.
:attr:`user_ids` and :attr:`group_ids`).
**Availability:**
- This property is parsed from the contents of ``/proc/[pid]/status``
the first time it is referenced, after that its value is cached so it
will always be available.
- If this property is first referenced after the process turns into a
zombie_ or the process ends then it's too late to read the contents
of ``/proc/[pid]/status`` and an empty dictionary is returned.
"""
fields = {}
with ProtectedAccess('status', "read detailed process status"):
with open(os.path.join(self.proc_tree, 'status')) as handle:
for line in handle:
name, _, value = line.partition(':')
fields[name] = value.strip()
return fields
@lazy_property
def user(self):
"""
The username of the real user ID (a string).
**Availability:** Refer to :attr:`user_ids`. :data:`None` is returned
if :attr:`user_ids` is unavailable or :func:`uid_to_name()` fails.
"""
return uid_to_name(self.user_ids.real) if self.user_ids else None
@lazy_property
def user_ids(self):
"""
The real, effective, saved set, and filesystem UIDs of the process (an :class:`OwnerIDs` object).
**Availability:** Refer to :attr:`status_fields`. :data:`None` is
returned if :attr:`status_fields` is unavailable.
"""
return self._parse_ids('Uid')
@lazy_property
def vsize(self):
"""
The virtual memory size of the process in bytes (an integer).
**Availability:** This property is parsed from the contents of
``/proc/[pid]/stat`` and is always available.
"""
return int(self.stat_fields[22])
def _parse_ids(self, field_name):
"""Helper for :attr:`user_ids` and :attr:`group_ids`."""
raw_value = self.status_fields.get(field_name, '')
parsed_values = [int(n) for n in raw_value.split()]
if len(parsed_values) >= 4:
return OwnerIDs(*parsed_values[:4])
class OwnerIDs(collections.namedtuple('OwnerIDs', 'real, effective, saved, fs')):
"""
A set of user or group IDs found in ``/proc/[pid]/status``.
:class:`OwnerIDs` objects are named tuples containing four integer numbers
called `real`, `effective`, `saved` and `fs`.
"""
def find_processes(obj_type=Process):
"""
Scan the numerical subdirectories of ``/proc`` for process information.
:param obj_type: The type of process objects to construct (expected to be
:class:`Process` or a subclass of :class:`Process`).
:returns: A generator of :class:`Process` objects.
"""
if not issubclass(obj_type, Process):
raise TypeError("Custom process types should inherit from proc.core.Process!")
root = '/proc'
num_processes = 0
logger.debug("Scanning for process information in %r ..", root)
for entry in os.listdir(root):
if entry.isdigit():
process = obj_type.from_path(os.path.join(root, entry))
if process:
num_processes += 1
yield process
logger.debug("Finished scanning %r, found %i processes.", root, num_processes)
def find_system_uptime():
"""
Find the system's uptime.
:returns: The uptime in seconds (a float).
This function returns the first number found in ``/proc/uptime``.
"""
with open('/proc/uptime') as handle:
contents = handle.read()
fields = contents.split()
return float(fields[0])
def sorted_by_pid(processes):
"""
Sort the given processes by their process ID.
:param processes: An iterable of :class:`Process` objects.
:returns: A list of :class:`Process` objects sorted by their process ID.
"""
return sorted(processes, key=lambda p: p.pid)
def parse_process_status(directory, silent=False):
"""
Read and tokenize a ``/proc/[pid]/stat`` file.
:param directory: The absolute pathname of the numerical subdirectory of
``/proc`` to get process information from (a string).
:returns: A list of strings containing the tokenized fields or ``None`` if
the ``/proc/[pid]/stat`` file disappears before it can be read
(in this case a warning is logged).
"""
with ProtectedAccess('stat', "read process status"):
with open(os.path.join(directory, 'stat')) as handle:
contents = handle.read()
# If a process ends after we've successfully opened the corresponding
# /proc/[pid]/stat file but before we've read the file contents I'm not
# 100% sure if a nonempty read is guaranteed, so we'll just make sure
# we actually got a nonempty read (I'd rather err on the side of
# caution :-).
if contents:
# This comment is here to justify the gymnastics with
# str.partition() and str.rpartition() below:
#
# The second field in /proc/[pid]/stat (called `comm' in `man 5
# proc') is the executable name. It's enclosed in parentheses and
# may contain spaces. Due to the very sparse documentation about
# this _obscure_ encoding I got curious and experimented a bit:
#
# Nothing prevents an executable name from containing parentheses
# and because these are just arbitrary characters without any
# meaning they don't need to be balanced. When such an executable
# name is embedded in /proc/[pid]/stat no further encoding is
# applied, you'll just get something like '((python))'.
#
# Fortunately the `comm' field is the only field that can contain
# arbitrary text, so if you take the text between the left most and
# right most parenthesis in /proc/[pid]/stat you'll end up with the
# correct answer!
before_comm, _, remainder = contents.partition('(')
comm, _, after_comm = remainder.rpartition(')')
# Combine the tokenized fields into a list of strings. All of the
# fields except `comm' are integers or a single alphabetic
# character (the state field) so using str.split() is okay here.
fields = before_comm.split()
fields.append(comm)
fields.extend(after_comm.split())
return fields
def parse_process_cmdline(directory):
"""
Read and tokenize a ``/proc/[pid]/cmdline`` file.
:param directory: The absolute pathname of the numerical subdirectory of
``/proc`` to get process information from (a string).
:returns: A list of strings containing the tokenized command line. If the
``/proc/[pid]/cmdline`` file disappears before it can be read an
empty list is returned (in this case a warning is logged).
"""
contents = ''
with ProtectedAccess('cmdline', "read process command line"):
with open(os.path.join(directory, 'cmdline')) as handle:
contents = handle.read()
# Strip the trailing null byte so we don't report every command line with a
# trailing empty string (our callers should not be bothered with obscure
# details about the encoding of /proc/[pid]/cmdline).
if contents.endswith('\0'):
contents = contents[:-1]
# Python's str.split() implementation splits empty strings into a list
# containing a single empty string. This is an incorrect representation of
# a parsed command line so we explicitly guard against this.
return contents.split('\0') if contents else []
def uid_to_name(uid):
"""
Find the username associated with a user ID.
:param uid: The user ID (an integer).
:returns: The username (a string) or :data:`None` if :func:`pwd.getpwuid()`
fails to locate a user for the given ID.
"""
try:
return pwd.getpwuid(uid).pw_name
except Exception:
return None
def gid_to_name(gid):
"""
Find the group name associated with a group ID.
:param gid: The group ID (an integer).
:returns: The group name (a string) or :data:`None` if :func:`grp.getgrgid()`
fails to locate a group for the given ID.
"""
try:
return grp.getgrgid(gid).gr_name
except Exception:
return None
class ProtectedAccess(object):
"""Context manager that deals with permission errors and race conditions."""
def __init__(self, key, action):
"""
Initialize a :class:`ProtectedAccess` object.
:param key: The key in :data:`NUM_RACE_CONDITIONS` (a string).
:param action: A verb followed by a noun describing what kind of access
is being protected (a string)
"""
self.key = key
self.action = action
def __enter__(self):
"""Enter the context (does nothing)."""
def __exit__(self, exc_type=None, exc_value=None, traceback=None):
"""Log and swallow exceptions and count race conditions."""
if exc_type is not None:
# Gotcha: On Python 2.6 when exc_type is IOError exc_value is
# an actual tuple instead of an exception object, hence
# the issubclass() and exc_value[0] gymnastics.
filename = getattr(exc_value, 'filename', 'filename unknown')
if issubclass(exc_type, EnvironmentError):
error_code = getattr(exc_value, 'errno', None) or exc_value[0]
if error_code == errno.EACCES:
# Permission errors are silently swallowed.
return True
if error_code in (errno.ENOENT, errno.ESRCH):
# If the file has gone missing we consider it a race condition:
# - ENOENT is reported when /proc/[pid] disappears.
# - ESRCH is reported when /proc/[pid]/stat disappears.
logger.debug("Failed to %s due to race condition! (%s)",
self.action, filename)
NUM_RACE_CONDITIONS[self.key] += 1
return True
# Other exceptions are logged and swallowed.
logger.warning("Failed to %s because of unexpected exception! (%s)",
self.action, filename, exc_info=(exc_type, exc_value, traceback))
return True
# Define aliases for backwards compatibility.
define_aliases(
module_name=__name__,
# In proc 1.0 the following module variable was renamed.
num_race_conditions='proc.core.NUM_RACE_CONDITIONS',
)