Server IP : 184.154.167.98 / Your IP : 3.133.117.95 Web Server : Apache System : Linux pink.dnsnetservice.com 4.18.0-553.22.1.lve.1.el8.x86_64 #1 SMP Tue Oct 8 15:52:54 UTC 2024 x86_64 User : puertode ( 1767) PHP Version : 7.2.34 Disable Function : NONE MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON Directory : /usr/libexec/pcp/pmdas/bcc/modules/ |
Upload File : |
# # Copyright (C) 2018 Andreas Gerstmayr <andreas@gerstmayr.me> # Based on the profile BCC tool by Brendan Gregg: # https://github.com/iovisor/bcc/blob/master/tools/profile.py # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # """ PCP BCC PMDA profile module """ # pylint: disable=invalid-name, too-many-branches, too-many-instance-attributes from threading import Thread, Lock from os import path import errno import json from bcc import BPF, PerfType, PerfSWConfig from pcp.pmapi import pmUnits from cpmapi import PM_TYPE_STRING, PM_SEM_INSTANT from cpmapi import PM_ERR_BADSTORE from cpmda import PMDA_FETCH_NOVALUES from modules.pcpbcc import PCPBCCBase # # BPF program # bpf_src = "modules/profile.bpf" # # PCP BCC PMDA constants # MODULE = 'profile' METRIC = 'proc.profile' units_none = pmUnits(0, 0, 0, 0, 0, 0) class StackNode(object): """ Stack Node """ def __init__(self, name, libtype=None): self.name = name self.libtype = libtype self.value = 0 self.children = {} self.parents = [] def lookup_or_init(self, name, libtype=None): """ Lookup or init new stack node """ if name in self.children: return self.children[name] new_node = StackNode(name, libtype) new_node.parents = self.parents + [self] self.children[name] = new_node return new_node def increment(self, n): """ Increment node and all parents """ self.value += n for parent in self.parents: parent.value += n class StackNodeEncoder(json.JSONEncoder): """ Stack Node JSON Encoder """ def default(self, o): # pylint: disable=method-hidden return {"name": o.name, "libtype": o.libtype, "value": o.value, "children": list(o.children.values())} # # PCP BCC Module # class PCPBCCModule(PCPBCCBase): """ PCP BCC profile module """ def __init__(self, config, log, err, proc_refresh): """ Constructor """ PCPBCCBase.__init__(self, MODULE, config, log, err) self.pid = None self.proc_filter = None self.proc_refresh = proc_refresh self.user_stacks_only = False self.kernel_stacks_only = False self.stack_storage_size = 16384 self.sample_frequency = 47 self.sample_period = 1000000 self.cpu = -1 for opt in self.config.options(MODULE): if opt == 'process': self.proc_filter = self.config.get(MODULE, opt) self.update_pids(self.get_proc_info(self.proc_filter)) if opt == 'user_stacks_only': self.user_stacks_only = self.config.getboolean(MODULE, opt) if opt == 'kernel_stacks_only': self.kernel_stacks_only = self.config.getboolean(MODULE, opt) if opt == 'stack_storage_size': self.stack_storage_size = int(self.config.get(MODULE, opt)) if opt == 'sample_frequency': self.sample_frequency = int(self.config.get(MODULE, opt)) if opt == 'sample_period': self.sample_period = int(self.config.get(MODULE, opt)) if opt == 'cpu': self.cpu = int(self.config.get(MODULE, opt)) self.lock = Lock() self.status = "stopped" self.data = "null" self.log("Initialized.") def metrics(self): """ Get metric definitions """ name = METRIC self.items.append( # Name - reserved - type - semantics - units - help (name, None, PM_TYPE_STRING, PM_SEM_INSTANT, units_none, 'stack traces'), ) return False, self.items def compile(self): """ Compile BPF """ # This module compiles the BPF code on-the-fly if a profiling request is received def start_profiling(self): """ Starts profiling """ with self.lock: # pylint: disable=not-context-manager if self.status != "stopped": self.err("Profiling must be stopped before, current status: %s." % self.status) return self.update_pids(self.get_proc_info(self.proc_filter)) if self.proc_filter and not self.pid: self.log("Nothing to profile, profiling not started.") return self.status = "starting" self.data = "null" self.log("Starting profiling...") try: if not self.bpf_text: with open(path.dirname(__file__) + '/../' + bpf_src) as src: self.bpf_text = src.read() # Set stack storage size self.bpf_text = self.bpf_text.replace("STACK_STORAGE_SIZE", str(self.stack_storage_size)) # Handle stack args kernel_stack_get = "stack_traces.get_stackid(&ctx->regs, 0)" user_stack_get = "stack_traces.get_stackid(&ctx->regs, BPF_F_USER_STACK)" if self.user_stacks_only: kernel_stack_get = "-1" elif self.kernel_stacks_only: user_stack_get = "-1" self.bpf_text = self.bpf_text.replace("USER_STACK_GET", user_stack_get) self.bpf_text = self.bpf_text.replace("KERNEL_STACK_GET", kernel_stack_get) sample_freq = 0 sample_period = 0 if self.sample_frequency: sample_freq = self.sample_frequency elif self.sample_period: sample_period = self.sample_period thread_filter = "pid == %s" % self.pid if self.pid else "1" bpf_text = self.bpf_text.replace("THREAD_FILTER", thread_filter) if self.debug: self.log("BPF to be compiled:\n" + bpf_text.strip()) self.bpf = BPF(text=bpf_text) self.bpf.attach_perf_event(ev_type=PerfType.SOFTWARE, ev_config=PerfSWConfig.CPU_CLOCK, fn_name="do_perf_event", sample_period=sample_period, sample_freq=sample_freq, cpu=self.cpu) with self.lock: # pylint: disable=not-context-manager self.status = "started" self.log("Started profiling.") except Exception as error: # pylint: disable=broad-except self.bpf = None self.err(str(error)) self.err("Failed to compile BPF code!") with self.lock: # pylint: disable=not-context-manager self.status = "stopped" raise @staticmethod def stack_id_err(stack_id): """ Helper method to detect stack id errors """ # -EFAULT in get_stackid normally means the stack-trace is not available, # such as getting kernel stack trace in userspace code return (stack_id < 0) and (stack_id != -errno.EFAULT) @staticmethod def enforce_str(x): """ Decode bytes object """ return x.decode() if isinstance(x, bytes) else x def stop_profiling(self): """ Stops profiling and generates JSON of stacktraces """ with self.lock: # pylint: disable=not-context-manager if self.status != "started": self.err("Please start profiling first, current status: %s." % self.status) return self.status = "stopping" self.log("Stopping profiling...") missing_stacks = 0 has_enomem = False counts = self.bpf["counts"] stack_traces = self.bpf["stack_traces"] root_node = StackNode("root") for k, v in sorted(counts.items(), key=lambda counts: counts[1].value): # Handle get_stackid errors if not self.user_stacks_only and self.stack_id_err(k.kernel_stack_id): missing_stacks += 1 has_enomem = has_enomem or k.kernel_stack_id == -errno.ENOMEM if not self.kernel_stacks_only and self.stack_id_err(k.user_stack_id): missing_stacks += 1 has_enomem = has_enomem or k.user_stack_id == -errno.ENOMEM user_stack = [] if k.user_stack_id < 0 else stack_traces.walk(k.user_stack_id) kernel_tmp = [] if k.kernel_stack_id < 0 else stack_traces.walk(k.kernel_stack_id) # Fix kernel stack kernel_stack = [] if k.kernel_stack_id >= 0: for addr in kernel_tmp: kernel_stack.append(addr) # The later IP checking if k.kernel_ip: kernel_stack.insert(0, k.kernel_ip) user_stack = list(user_stack) kernel_stack = list(kernel_stack) cur_node = root_node.lookup_or_init(k.name.decode(), libtype="user") # If we failed to get the stack is, such as due to no space (-ENOMEM) or # hash collision (-EEXIST), we still print a placeholder for consistency if not self.kernel_stacks_only: if self.stack_id_err(k.user_stack_id): cur_node = cur_node.lookup_or_init("[Missed User Stack]", libtype="user") else: for addr in reversed(user_stack): cur_node = cur_node.lookup_or_init( self.enforce_str(self.bpf.sym(addr, k.pid)), libtype="user") if not self.user_stacks_only: if self.stack_id_err(k.kernel_stack_id): cur_node = cur_node.lookup_or_init("[Missed Kernel Stack]", libtype="kernel") else: for addr in reversed(kernel_stack): cur_node = cur_node.lookup_or_init( self.enforce_str(self.bpf.ksym(addr)), libtype="kernel") cur_node.increment(v.value) # Check missing if missing_stacks > 0: enomem_str = "" if not has_enomem else " Consider increasing stack_storage_size." self.log("WARNING: %d stack traces could not be displayed.%s" % (missing_stacks, enomem_str)) super(PCPBCCModule, self).cleanup() with self.lock: # pylint: disable=not-context-manager self.data = json.dumps(root_node, cls=StackNodeEncoder) self.status = "stopped" self.log("Stopped profiling.") def refresh(self): """ Refresh BPF data """ return def bpfdata(self, item, inst): """ Return BPF data as PCP metric value """ try: with self.lock: # pylint: disable=not-context-manager value = '{"status": "%s", "data": %s}' % (self.status, self.data) return [value, 1] except Exception: # pylint: disable=broad-except return [PMDA_FETCH_NOVALUES, 0] def store(self, _item, _inst, val): """ Start/stop profiling """ if val == "start": thread = Thread(target=self.start_profiling) thread.daemon = True thread.start() elif val == "stop": thread = Thread(target=self.stop_profiling) thread.daemon = True thread.start() else: self.err("Received invalid command: " + val) return PM_ERR_BADSTORE return 0 def cleanup(self): """ Clean up at exit """ # This module cleans up the BPF code on-the-fly when profiling request is stopped