Server IP : 184.154.167.98 / Your IP : 18.191.218.101 Web Server : Apache System : Linux pink.dnsnetservice.com 4.18.0-553.22.1.lve.1.el8.x86_64 #1 SMP Tue Oct 8 15:52:54 UTC 2024 x86_64 User : puertode ( 1767) PHP Version : 7.2.34 Disable Function : NONE MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON Directory : /var/lib/pcp/config/pmie/ |
Upload File : |
// pmieconf-pmie 1 /var/lib/pcp/config/pmieconf // 1 dmthin.data_high_util enabled = yes // 1 dmthin.metadata_high_util enabled = yes // 1 primary.pmda_restart enabled = yes // end // // --- START GENERATED SECTION (do not change this section) --- // Auto-generated by pmieconf on: Mon Nov 4 14:12:53 2024 // // 1 cpu.context_switch delta = 2 min; cpu.context_switch = some_host ( kernel.all.pswitch > hinv.ncpu * 4000 count/sec ) -> syslog 10 min "High aggregate context switch rate" " %vctxsw/s@%h"; // 1 cpu.load_average delta = 2 min; cpu.load_average = some_host ( // threshold scales with the number of CPUs (works better for // large systems) and there is an absolute lower bound, // especially for small systems kernel.all.load #'1 minute' > hinv.ncpu * 3 && kernel.all.load #'1 minute' > 4 ) -> syslog 10 min "High 1-minute load average" " %vload@%h"; // 1 cpu.system delta = 2 min; cpu.system = some_host ( // first term is always true, but provides %v for actions ... ( 100 * kernel.all.cpu.sys / hinv.ncpu ) > 0 && 100 * ( kernel.all.cpu.user + kernel.all.cpu.sys ) > 70 * hinv.ncpu && 100 * kernel.all.cpu.sys / ( kernel.all.cpu.user + kernel.all.cpu.sys ) > 75 ) -> syslog 10 min "Busy executing in system mode" " %v%sys@%h"; // 1 cpu.util delta = 2 min; cpu.util = some_host ( 100 * ( kernel.all.cpu.user + kernel.all.cpu.sys + kernel.all.cpu.intr ) / hinv.ncpu > 90 ) -> syslog 10 min "High average processor utilization" " %v%util@%h"; // 1 dmthin.data_high_util delta = 10 mins; dmthin.data_high_util = some_host ( some_inst ( ( 100 * dmthin.pool.data.used / dmthin.pool.data.total ) > 95 ) ) -> syslog 10 min "dmthin pool data is filling up" " %v%used [%i]@%h"; // 1 dmthin.metadata_high_util delta = 10 mins; dmthin.metadata_high_util = some_host ( some_inst ( ( 100 * dmthin.pool.metadata.used / dmthin.pool.metadata.total ) > 80 ) ) -> syslog 10 min "dmthin pool metadata is filling up" " %v%used [%i]@%h"; // 1 entropy.available delta = 2 min; entropy.available = some_host ( ( 100 * kernel.all.entropy.avail / kernel.all.entropy.poolsize ) >= 0 && ( kernel.all.entropy.avail <= 150 ) ) -> syslog 10 min "Low random number entropy available" " %v%avail@%h"; // 1 filesys.filling delta = 4 mins; filesys.filling = some_host ( some_inst ( ( 100 * filesys.used / filesys.capacity ) > 95 && filesys.used + 20 min * ( rate filesys.used ) > filesys.capacity ) ) -> syslog 10 min "File system is filling up" " %v%used[%i]@%h"; // 1 memory.exhausted delta = 2 min; memory.exhausted = some_host ( ( avg_sample ( swap.pagesout @0..9 ) ) > 0 && 30 %_sample swap.pagesout @0..9 >= 5 ) -> syslog 10 min "Severe demand for real memory" " %vpgsout/s@%h"; // 1 memory.oom_kill delta = 2 min; memory.oom_kill = some_host ( mem.vmstat.oom_kill > 0 ) -> syslog 10 min "OOM kill" " %v/sec@%h"; // 1 mssql.stolen_node_memory delta = 2 min; mssql.stolen_node_memory = some_host ( ( avg_sample ( mssql.memory_manager.stolen_node_memory @0..9 ) ) > 0 && 60 %_sample ( 100 * ( mssql.memory_manager.stolen_node_memory @0..9 / mssql.memory_manager.total_server_memory @0..9 ) ) > 70 ) -> syslog 10 min "High mssql stolen node memory" " %v%mem@%h"; // 1 network.listenoverflows delta = 2 min; network.listenoverflows = some_host ( network.tcp.listenoverflows > 0 ) -> syslog 10 min "TCP listen overflows" " %v/sec@%h"; // 1 network.tcpqfulldocookies delta = 2 min; network.tcpqfulldocookies = some_host ( network.tcp.reqqfulldocookies > 0 ) -> syslog 10 min "TCP request queue full SYN cookie replies" " %v/sec@%h"; // 1 network.tcpqfulldrops delta = 2 min; network.tcpqfulldrops = some_host ( network.tcp.reqqfulldrop > 0 ) -> syslog 10 min "TCP request queue full drops" " %v/sec@%h"; // 1 per_cpu.many_util delta = 2 min; per_cpu.many_util = some_host ( hinv.ncpu > 12 && 80 %_inst ( 100 * ( kernel.percpu.cpu.user + kernel.percpu.cpu.sys + kernel.percpu.cpu.intr ) > 95 ) ) -> syslog 10 min "High number of saturated processors" " >80%CPUs@%h"; // 1 per_cpu.some_util delta = 2 min; per_cpu.some_util = some_host ( some_inst ( ( 100 * ( kernel.percpu.cpu.user + kernel.percpu.cpu.sys + kernel.percpu.cpu.intr ) ) > 95 ) && hinv.ncpu > 1 && hinv.ncpu <= 12 ) -> syslog 10 min "High per CPU processor utilization" " %v%util[%i]@%h"; // 1 per_cpu.system delta = 2 min; per_cpu.system = some_host ( some_inst ( // first term is always true, but provides %v for actions ... ( 100 * kernel.percpu.cpu.sys ) > 0 && 100 * ( kernel.percpu.cpu.user + kernel.percpu.cpu.sys ) > 75 && 100 * kernel.percpu.cpu.sys / ( kernel.percpu.cpu.user + kernel.percpu.cpu.sys ) > 80 ) && hinv.ncpu > 1 ) -> syslog 10 min "Some CPU busy executing in system mode" " %v%sys[%i]@%h"; // 1 per_disk.average_queue_length delta = 2 min; per_disk.average_queue_length = some_host ( some_inst ( ( disk.dev.aveq @0 > disk.dev.aveq @1 ) && ( disk.dev.avactive * 100 > 95 ) ) ) -> syslog 10 min "High per disk average queue length" " %vaveq[%i]@%h"; // 1 per_disk.average_wait_time delta = 2 min; per_disk.average_wait_time = some_host ( some_inst ( ( disk.dev.total_rawactive @0 / disk.dev.total @0 ) * 100 > ( disk.dev.total_rawactive @1 / disk.dev.total @1 ) * 100 && ( disk.dev.avactive * 100 > 95 ) ) ) -> syslog 10 min "High per disk average queue length" " %v%await[%i]@%h"; // 1 per_netif.collisions delta = 2 min; per_netif.collisions = some_inst ( 100 * network.interface.collisions / ( network.interface.collisions + network.interface.out.packets ) > 40 && network.interface.out.packets > 10 count/second ) -> syslog 10 min "High collision rate in packet sends" " %v%collisions[%i]@%h"; // 1 per_netif.errors delta = 2 min; per_netif.errors = some_inst ( network.interface.total.errors > 15 ) -> syslog 10 min "High network interface error rate" " %verr/s[%i]@%h"; // 1 per_netif.util delta = 2 min; per_netif.util = some_inst ( ( 100 * network.interface.total.bytes / network.interface.baudrate ) > 85 && network.interface.baudrate > 0 ) -> syslog 10 min "High network interface utilization" " %v%util[%i]@%h"; // 1 power.thermal_throttle delta = 2 min; power.thermal_throttle = some_host ( 100 * hinv.cpu.thermal_throttle.package.time > 50 ) -> syslog 10 min "CPU is experiencing thermal throttling" " %v%time[%i]@%h"; // 1 primary.pmda_restart delta = 2 min; primary.pmda_restart = some_inst ( pmcd.agent.status != 0 ) -> shell 10 min "pmsignal -s HUP -p pmcd" & syslog 10 min "Restart unresponsive PMDAs" " pmda%i[%v]@%h"; // --- END GENERATED SECTION (changes below will be preserved) ---