Message ID | 20250311203034.8065-3-jwyatt@redhat.com |
---|---|
State | New |
Headers | show |
Series | Add cpupower idle-state functionality | expand |
On Tue, Mar 11, 2025 at 04:30:19PM -0400, John B. Wyatt IV wrote: > Allows Tuna to control cpu idle-state functionality on the system, > including querying, enabling, disabling of cpu idle-states to control > power usage or to test functionality. > > This requires cpupower, a utility in the Linux kernel repository and > the cpupower Python bindings added in Linux 6.12 to control cpu > idle-states. > > This patch revision includes text snippet suggestions by Crystal Wood > (v2) and small Python suggestions by John Kacur (v3 and v4). > > Suggested-by: Crystal Wood <crwood@redhat.com> > Suggested-by: John Kacur <jkacur@redhat.com> > > Signed-off-by: John B. Wyatt IV <jwyatt@redhat.com> > Signed-off-by: John B. Wyatt IV <sageofredondo@gmail.com> > --- > tuna-cmd.py | 30 +++++++- > tuna/cpupower.py | 184 +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 211 insertions(+), 3 deletions(-) > create mode 100755 tuna/cpupower.py > > diff --git a/tuna-cmd.py b/tuna-cmd.py > index d0323f5..96a25a5 100755 > --- a/tuna-cmd.py > +++ b/tuna-cmd.py > @@ -25,6 +25,7 @@ from tuna import tuna, sysfs, utils > import logging > import time > import shutil > +import tuna.cpupower as cpw > > def get_loglevel(level): > if level.isdigit() and int(level) in range(0,5): > @@ -115,8 +116,12 @@ def gen_parser(): > "disable_perf": dict(action='store_true', help="Explicitly disable usage of perf in GUI for process view"), > "refresh": dict(default=2500, metavar='MSEC', type=int, help="Refresh the GUI every MSEC milliseconds"), > "priority": dict(default=(None, None), metavar="POLICY:RTPRIO", type=tuna.get_policy_and_rtprio, help="Set thread scheduler tunables: POLICY and RTPRIO"), > - "background": dict(action='store_true', help="Run command as background task") > - } > + "background": dict(action='store_true', help="Run command as background task"), > + "idle_info": dict(dest='idle_info', action='store_const', const=True, help='Print general idle information for the selected CPUs, including index values for IDLE-STATE.'), > + "idle_state_disabled_status": dict(dest='idle_state_disabled_status', metavar='IDLE-STATE', type=int, help='Print whether IDLE-STATE is enabled on the selected CPUs.'), > + "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLE-STATE', type=int, help='Disable IDLE-STATE on the selected CPUs.'), > + "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLE-STATE', type=int, help='Enable IDLE-STATE on the selected CPUs.') > + } > > parser = HelpMessageParser(description="tuna - Application Tuning Program") > > @@ -127,6 +132,9 @@ def gen_parser(): > > subparser = parser.add_subparsers(dest='command') > > + idle_set = subparser.add_parser('idle_set', > + description='Manage CPU idle state disabling (requires libcpupower and it\'s Python bindings)', > + help='Set all idle states on a given CPU-LIST.') > isolate = subparser.add_parser('isolate', description="Move all allowed threads and IRQs away from CPU-LIST", > help="Move all allowed threads and IRQs away from CPU-LIST") > include = subparser.add_parser('include', description="Allow all threads to run on CPU-LIST", > @@ -146,7 +154,6 @@ def gen_parser(): > show_threads = subparser.add_parser('show_threads', description='Show thread list', help='Show thread list') > show_irqs = subparser.add_parser('show_irqs', description='Show IRQ list', help='Show IRQ list') > show_configs = subparser.add_parser('show_configs', description='List preloaded profiles', help='List preloaded profiles') > - > what_is = subparser.add_parser('what_is', description='Provides help about selected entities', help='Provides help about selected entities') > gui = subparser.add_parser('gui', description="Start the GUI", help="Start the GUI") > > @@ -218,6 +225,13 @@ def gen_parser(): > show_irqs_group.add_argument('-S', '--sockets', **MODS['sockets']) > show_irqs.add_argument('-q', '--irqs', **MODS['irqs']) > > + idle_set_group = idle_set.add_mutually_exclusive_group(required=True) > + idle_set_group.add_argument('-i', '--idle-info', **MODS['idle_info']) > + idle_set_group.add_argument('-s', '--status', **MODS['idle_state_disabled_status']) > + idle_set_group.add_argument('-d', '--disable', **MODS['disable_idle_state']) > + idle_set_group.add_argument('-e', '--enable', **MODS['enable_idle_state']) > + idle_set.add_argument('-c', '--cpus', **MODS['cpus']) > + > what_is.add_argument('thread_list', **POS['thread_list']) > > gui.add_argument('-d', '--disable_perf', **MODS['disable_perf']) > @@ -647,6 +661,16 @@ def main(): > print("Valid log levels: NOTSET, DEBUG, INFO, WARNING, ERROR") > print("Log levels may be specified numerically (0-4)\n") > > + if args.command == 'idle_set': > + if not cpw.have_cpupower: > + print(f"Error: libcpupower bindings are not detected; please install libcpupower bindings from at least kernel {cpw.cpupower_required_kernel}.") Since it is an error message, print() should be called with "file=sys.stderr" > + sys.exit(1) > + > + my_cpupower = cpw.Cpupower(args.cpu_list) > + ret = my_cpupower.idle_set_handler(args) > + if ret > 0: > + sys.exit(ret) > + > if 'irq_list' in vars(args): > ps = procfs.pidstats() > if tuna.has_threaded_irqs(ps): > diff --git a/tuna/cpupower.py b/tuna/cpupower.py > new file mode 100755 > index 0000000..7913027 > --- /dev/null > +++ b/tuna/cpupower.py > @@ -0,0 +1,184 @@ > +# SPDX-License-Identifier: GPL-2.0-only > +# Copyright (C) 2024 John B. Wyatt IV > + > +from typing import List > +import tuna.utils as utils > + > +cpupower_required_kernel = "6.12" > +have_cpupower = None > + > +try: > + import raw_pylibcpupower as lcpw > + lcpw.cpufreq_get_available_frequencies(0) > + have_cpupower = True > +except: Catch all statements in Python are bad IMO, because they catch syntax errors as well. We should at least print the exception. > + lcpw = None > + have_cpupower = False > + > +if have_cpupower: > + class Cpupower: > + """The Cpupower class allows you to query and change the power states of the > + cpu. > + > + You may query or change the cpus all at once or a list of the cpus provided to the constructor's cpulist argument. > + > + The bindings must be detected on the $PYTHONPATH variable. > + > + You must use have_cpupower variable to determine if the bindings were > + detected in your code.""" > + > + LCPW_ERROR_TWO_CASE = 1 # enum for common error messages > + LCPW_ERROR_THREE_CASE = 2 > + > + def __init__(self, cpu_list=None): > + if cpu_list == None or cpu_list == []: # args.cpu_list == [] when the -c option is not used Nit: use the "is" operator to compare against None. > + self.__cpu_list = utils.get_all_cpu_list() > + else: > + self.__cpu_list = cpu_list > + > + @staticmethod > + def handle_common_lcpw_errors(e, error_type, idle_name): > + match e: > + case 0: > + pass > + case -1: > + print(f"Idlestate {idle_name} not available") > + case -2: > + print("Disabling is not supported by the kernel") > + case -3: > + if error_type == Cpupower.LCPW_ERROR_THREE_CASE: > + print("No write access to disable/enable C-states: try using sudo") > + else: > + print(f"Not documented: {e}") > + case _: > + print(f"Not documented: {e}") Error message should go to stderr by adding "file=sys.stderr" to the print() statements. > + > + @staticmethod > + def get_idle_states(cpu): > + """ > + Get the c-states of a cpu. > + > + You can capture the return values with: > + states_list, states_amt = get_idle_states() > + > + Returns > + List[String]: list of cstates > + Int: amt of cstates > + """ > + ret = [] > + for cstate in range(lcpw.cpuidle_state_count(cpu)): > + ret.append(lcpw.cpuidle_state_name(cpu,cstate)) > + return ret, lcpw.cpuidle_state_count(cpu) > + > + @staticmethod > + def get_idle_info(cpu=0): > + idle_states, idle_states_amt = Cpupower.get_idle_states(cpu) > + idle_states_list = [] > + for idle_state in range(0, len(idle_states)): > + idle_states_list.append( > + { > + "CPU ID": cpu, > + "Idle State Name": idle_states[idle_state], > + "Flags/Description": lcpw.cpuidle_state_desc(cpu, idle_state), > + "Latency": lcpw.cpuidle_state_latency(cpu, idle_state), > + "Usage": lcpw.cpuidle_state_usage(cpu, idle_state), > + "Duration": lcpw.cpuidle_state_time(cpu, idle_state) > + } > + ) > + idle_info = { > + "CPUidle-driver": lcpw.cpuidle_get_driver(), > + "CPUidle-governor": lcpw.cpuidle_get_governor(), > + "idle-states-count": idle_states_amt, > + "available-idle-states": idle_states, > + "cpu-states": idle_states_list > + } > + return idle_info > + > + @staticmethod > + def print_idle_info(cpu_list=[0]): > + for cpu in cpu_list: > + idle_info = Cpupower.get_idle_info(cpu) > + print_str = f""" > +CPUidle driver: {idle_info["CPUidle-driver"]} > +CPUidle governor: {idle_info["CPUidle-governor"]} > +analyzing CPU {cpu} > + > +Number of idle states: {idle_info["idle-states-count"]} > +Available idle states: {idle_info["available-idle-states"]} > +""" > + for state in idle_info["cpu-states"]: > + print_str += f"""{state["Idle State Name"]} > +Flags/Description: {state["Flags/Description"]} > +Latency: {state["Latency"]} > +Usage: {state["Usage"]} > +Duration: {state["Duration"]} > +""" > + print( > + print_str > + ) nit: I don't think we need three lines for this statement. > + > + def idle_set_handler(self, args) -> int: > + if args.idle_state_disabled_status != None: > + cstate_index = args.idle_state_disabled_status > + cstate_list, cstate_amt = self.get_idle_states(self.__cpu_list[0]) # Assuming all cpus have the same idle state > + if cstate_index < 0 or cstate_index >= cstate_amt: > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") Nit: redirect error message to stderr. > + return 1 > + cstate_name = cstate_list[cstate_index] > + ret = self.is_disabled_idle_state(cstate_index) > + for i,e in enumerate(ret): > + if e == 1: > + print(f"CPU: {self.__cpu_list[i]} Idle state \"{cstate_name}\" is disabled.") > + elif e == 0: > + print(f"CPU: {self.__cpu_list[i]} Idle state \"{cstate_name}\" is enabled.") > + else: > + self.handle_common_lcpw_errors(e, self.LCPW_ERROR_TWO_CASE, cstate_name) > + elif args.idle_info != None: nit: Use the "is" operator to compare against None. > + self.print_idle_info(self.__cpu_list) > + return 0 > + elif args.disable_idle_state != None: Ditto. > + cstate_index = args.disable_idle_state > + cstate_list, cstate_amt = self.get_idle_states(self.__cpu_list[0]) # Assuming all cpus have the same idle state > + if cstate_index < 0 or cstate_index >= cstate_amt: > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") Nit: redirect error message to stderr. > + return 1 > + cstate_name = cstate_list[cstate_index] > + ret = self.disable_idle_state(cstate_index, 1) > + for e in ret: > + self.handle_common_lcpw_errors(e, self.LCPW_ERROR_THREE_CASE, cstate_name) > + elif args.enable_idle_state != None: nit: Use the "is" operator to compare against None. > + cstate_index = args.enable_idle_state > + cstate_list, cstate_amt = self.get_idle_states(self.__cpu_list[0]) # Assuming all cpus have the same idle state > + if cstate_index < 0 or cstate_index >= cstate_amt: > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") nit: Use the "is" operator to compare against None. > + return 1 > + cstate_name = cstate_list[cstate_index] > + ret = self.disable_idle_state(cstate_index, 0) > + for e in ret: > + self.handle_common_lcpw_errors(e, self.LCPW_ERROR_THREE_CASE, cstate_name) > + return 0 > + > + def disable_idle_state(self, state, disabled) -> List[int]: > + """ > + Disable or enable an idle state using the object's stored list of cpus. > + > + Args: > + state (int): The cpu idle state index to disable or enable as an int starting from 0. > + disabled (int): set to 1 to disable or 0 to enable. Less than 0 is an error. > + """ > + ret = [] > + for cpu in self.__cpu_list: > + ret.append(lcpw.cpuidle_state_disable(cpu, state, disabled)) > + return ret > + > + def is_disabled_idle_state(self, state) -> List[int]: > + """ > + Query the idle state. > + > + Args: > + state: The cpu idle state. 1 is disabled, 0 is enabled. Less than 0 is an error. > + """ > + ret = [] > + for cpu in self.__cpu_list: > + ret.append(lcpw.cpuidle_is_state_disabled(cpu, state)) > + return ret > -- > 2.48.1 >
On Wed, 2025-03-12 at 11:41 -0300, Wander Lairson Costa wrote: > On Tue, Mar 11, 2025 at 04:30:19PM -0400, John B. Wyatt IV wrote: > > Allows Tuna to control cpu idle-state functionality on the system, > > including querying, enabling, disabling of cpu idle-states to control > > power usage or to test functionality. > > > > This requires cpupower, a utility in the Linux kernel repository and > > the cpupower Python bindings added in Linux 6.12 to control cpu > > idle-states. > > > > This patch revision includes text snippet suggestions by Crystal Wood > > (v2) and small Python suggestions by John Kacur (v3 and v4). > > > > Suggested-by: Crystal Wood <crwood@redhat.com> Eh, this tag makes it look like I suggested the overall idea, not that I gave feedback on the patch... > > @@ -115,8 +116,12 @@ def gen_parser(): > > "disable_perf": dict(action='store_true', help="Explicitly disable usage of perf in GUI for process view"), > > "refresh": dict(default=2500, metavar='MSEC', type=int, help="Refresh the GUI every MSEC milliseconds"), > > "priority": dict(default=(None, None), metavar="POLICY:RTPRIO", type=tuna.get_policy_and_rtprio, help="Set thread scheduler tunables: POLICY and RTPRIO"), > > - "background": dict(action='store_true', help="Run command as background task") > > - } > > + "background": dict(action='store_true', help="Run command as background task"), > > + "idle_info": dict(dest='idle_info', action='store_const', const=True, help='Print general idle information for the selected CPUs, including index values for IDLE-STATE.'), > > + "idle_state_disabled_status": dict(dest='idle_state_disabled_status', metavar='IDLE-STATE', type=int, help='Print whether IDLE-STATE is enabled on the selected CPUs.'), You have a tab between "selected" and "CPUs". > > + "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLE-STATE', type=int, help='Disable IDLE-STATE on the selected CPUs.'), > > + "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLE-STATE', type=int, help='Enable IDLE-STATE on the selected CPUs.') > > + } > > > > parser = HelpMessageParser(description="tuna - Application Tuning Program") > > > > @@ -127,6 +132,9 @@ def gen_parser(): > > > > subparser = parser.add_subparsers(dest='command') > > > > + idle_set = subparser.add_parser('idle_set', > > + description='Manage CPU idle state disabling (requires libcpupower and it\'s Python bindings)', > > + help='Set all idle states on a given CPU-LIST.') s/Set all idle states/Manage idle states/ I still don't like the "idle_set" name. This does more than setting, unlike the standalone utility that we're apparently reimplementing. > > diff --git a/tuna/cpupower.py b/tuna/cpupower.py > > new file mode 100755 > > index 0000000..7913027 > > --- /dev/null > > +++ b/tuna/cpupower.py > > @@ -0,0 +1,184 @@ > > +# SPDX-License-Identifier: GPL-2.0-only > > +# Copyright (C) 2024 John B. Wyatt IV > > + > > +from typing import List > > +import tuna.utils as utils > > + > > +cpupower_required_kernel = "6.12" > > +have_cpupower = None > > + > > +try: > > + import raw_pylibcpupower as lcpw > > + lcpw.cpufreq_get_available_frequencies(0) > > + have_cpupower = True > > +except: > > Catch all statements in Python are bad IMO, because they catch syntax > errors as well. We should at least print the exception. The (expected) exception is for the case where the package is not installed and thus we disable that feature. We don't want to print anything in that case (unless the user tries to use these features). But yeah, being more specific in the catch makes sense. > > + def __init__(self, cpu_list=None): > > + if cpu_list == None or cpu_list == []: # args.cpu_list == [] when the -c option is not used > > Nit: use the "is" operator to compare against None. Or just use "if cpu_list:" like the existing code does. That's also pretty obnoxious behavior of the code that sets args.cpu_list... > > + @staticmethod > > + def print_idle_info(cpu_list=[0]): This default is never used. cpu_list is only ever set to self.__cpu_list. Why is this a static method? > > + for cpu in cpu_list: > > + idle_info = Cpupower.get_idle_info(cpu) > > + print_str = f""" > > +CPUidle driver: {idle_info["CPUidle-driver"]} > > +CPUidle governor: {idle_info["CPUidle-governor"]} > > +analyzing CPU {cpu} > > + > > +Number of idle states: {idle_info["idle-states-count"]} > > +Available idle states: {idle_info["available-idle-states"]} > > +""" > > + for state in idle_info["cpu-states"]: > > + print_str += f"""{state["Idle State Name"]} > > +Flags/Description: {state["Flags/Description"]} > > +Latency: {state["Latency"]} > > +Usage: {state["Usage"]} > > +Duration: {state["Duration"]} > > +""" > > + print( > > + print_str > > + ) > > nit: I don't think we need three lines for this statement. Why not just print each state inside the loop, instead of building one big string? > > + elif args.enable_idle_state != None: > > nit: Use the "is" operator to compare against None. > > > + cstate_index = args.enable_idle_state > > + cstate_list, cstate_amt = self.get_idle_states(self.__cpu_list[0]) # Assuming all cpus have the same idle state > > + if cstate_index < 0 or cstate_index >= cstate_amt: > > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") > > nit: Use the "is" operator to compare against None. > > > + return 1 > > + cstate_name = cstate_list[cstate_index] > > + ret = self.disable_idle_state(cstate_index, 0) > > + for e in ret: > > + self.handle_common_lcpw_errors(e, self.LCPW_ERROR_THREE_CASE, cstate_name) Why isn't self.disable_idle_state() doing the error handling (which is still duplicated between enable and disable)? And as I mentioned before, isn't libcpupower itself doing the same range checking? It looks like the get_idle_states() call is just for that redundant check, and to print the name in an error... You could wait until you actually get an error, and then just call lcpw.cpuidle_state_name on the particular cpu and state id. That would get rid of the assumption about all cpus having the same idle states, and be simpler. -Crystal
On Thu, 13 Mar 2025, Crystal Wood wrote: > On Wed, 2025-03-12 at 11:41 -0300, Wander Lairson Costa wrote: > > On Tue, Mar 11, 2025 at 04:30:19PM -0400, John B. Wyatt IV wrote: > > > Allows Tuna to control cpu idle-state functionality on the system, > > > including querying, enabling, disabling of cpu idle-states to control > > > power usage or to test functionality. > > > > > > This requires cpupower, a utility in the Linux kernel repository and > > > the cpupower Python bindings added in Linux 6.12 to control cpu > > > idle-states. > > > > > > This patch revision includes text snippet suggestions by Crystal Wood > > > (v2) and small Python suggestions by John Kacur (v3 and v4). > > > > > > Suggested-by: Crystal Wood <crwood@redhat.com> > Eh, this tag makes it look like I suggested the overall idea, not that I > gave feedback on the patch... Good point, I can remove it. > > > > @@ -115,8 +116,12 @@ def gen_parser(): > > > "disable_perf": dict(action='store_true', help="Explicitly disable usage of perf in GUI for process view"), > > > "refresh": dict(default=2500, metavar='MSEC', type=int, help="Refresh the GUI every MSEC milliseconds"), > > > "priority": dict(default=(None, None), metavar="POLICY:RTPRIO", type=tuna.get_policy_and_rtprio, help="Set thread scheduler tunables: POLICY and RTPRIO"), > > > - "background": dict(action='store_true', help="Run command as background task") > > > - } > > > + "background": dict(action='store_true', help="Run command as background task"), > > > + "idle_info": dict(dest='idle_info', action='store_const', const=True, help='Print general idle information for the selected CPUs, including index values for IDLE-STATE.'), > > > + "idle_state_disabled_status": dict(dest='idle_state_disabled_status', metavar='IDLE-STATE', type=int, help='Print whether IDLE-STATE is enabled on the selected CPUs.'), > > You have a tab between "selected" and "CPUs". Nice catch, thanks > > > > + "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLE-STATE', type=int, help='Disable IDLE-STATE on the selected CPUs.'), > > > + "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLE-STATE', type=int, help='Enable IDLE-STATE on the selected CPUs.') > > > + } > > > > > > parser = HelpMessageParser(description="tuna - Application Tuning Program") > > > > > > @@ -127,6 +132,9 @@ def gen_parser(): > > > > > > subparser = parser.add_subparsers(dest='command') > > > > > > + idle_set = subparser.add_parser('idle_set', > > > + description='Manage CPU idle state disabling (requires libcpupower and it\'s Python bindings)', > > > + help='Set all idle states on a given CPU-LIST.') > > s/Set all idle states/Manage idle states/ > > I still don't like the "idle_set" name. This does more than setting, > unlike the standalone utility that we're apparently reimplementing. I agree that idle_set is not the ideal name since if you look at the cpupower tool, this is just one function, maybe cpupower would be a better name. It's a bit unfair to say we're reimplementing the tool. The functionality is in upstream kernel code and John Wyatt created python bindings for that, and that is what he is using here. I think it is better to have all the tools for manipulating the environment through one tool and not make the user go search for multiple tools to get the job done. > > > > diff --git a/tuna/cpupower.py b/tuna/cpupower.py > > > new file mode 100755 > > > index 0000000..7913027 > > > --- /dev/null > > > +++ b/tuna/cpupower.py > > > @@ -0,0 +1,184 @@ > > > +# SPDX-License-Identifier: GPL-2.0-only > > > +# Copyright (C) 2024 John B. Wyatt IV > > > + > > > +from typing import List > > > +import tuna.utils as utils > > > + > > > +cpupower_required_kernel = "6.12" > > > +have_cpupower = None > > > + > > > +try: > > > + import raw_pylibcpupower as lcpw > > > + lcpw.cpufreq_get_available_frequencies(0) > > > + have_cpupower = True > > > +except: > > > > Catch all statements in Python are bad IMO, because they catch syntax > > errors as well. We should at least print the exception. Yes, that could be improved > > The (expected) exception is for the case where the package is not > installed and thus we disable that feature. We don't want to print > anything in that case (unless the user tries to use these features). > But yeah, being more specific in the catch makes sense. > > > > + def __init__(self, cpu_list=None): > > > + if cpu_list == None or cpu_list == []: # args.cpu_list == [] when the -c option is not used > > > > Nit: use the "is" operator to compare against None. These nits from a previous message are valid, but of course not critical, they can be fixed in subsequent patches. > > Or just use "if cpu_list:" like the existing code does. > > That's also pretty obnoxious behavior of the code that sets > args.cpu_list... > > > > + @staticmethod > > > + def print_idle_info(cpu_list=[0]): > > This default is never used. cpu_list is only ever set to > self.__cpu_list. Why is this a static method? This is a different cpu_list than the instantiated one. This is which cpus the user wants the info from. I think the name is confusing here. > > > > + for cpu in cpu_list: > > > + idle_info = Cpupower.get_idle_info(cpu) > > > + print_str = f""" > > > +CPUidle driver: {idle_info["CPUidle-driver"]} > > > +CPUidle governor: {idle_info["CPUidle-governor"]} > > > +analyzing CPU {cpu} > > > + > > > +Number of idle states: {idle_info["idle-states-count"]} > > > +Available idle states: {idle_info["available-idle-states"]} > > > +""" > > > + for state in idle_info["cpu-states"]: > > > + print_str += f"""{state["Idle State Name"]} > > > +Flags/Description: {state["Flags/Description"]} > > > +Latency: {state["Latency"]} > > > +Usage: {state["Usage"]} > > > +Duration: {state["Duration"]} > > > +""" > > > + print( > > > + print_str > > > + ) > > > > nit: I don't think we need three lines for this statement. > > Why not just print each state inside the loop, instead of building one > big string? valid, but I think John Wyatt was thinking the string could be useful for other purposes. > > > > + elif args.enable_idle_state != None: > > > > nit: Use the "is" operator to compare against None. > > > > > + cstate_index = args.enable_idle_state > > > + cstate_list, cstate_amt = self.get_idle_states(self.__cpu_list[0]) # Assuming all cpus have the same idle state > > > + if cstate_index < 0 or cstate_index >= cstate_amt: > > > + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") > > > > nit: Use the "is" operator to compare against None. > > > > > + return 1 > > > + cstate_name = cstate_list[cstate_index] > > > + ret = self.disable_idle_state(cstate_index, 0) > > > + for e in ret: > > > + self.handle_common_lcpw_errors(e, self.LCPW_ERROR_THREE_CASE, cstate_name) > > Why isn't self.disable_idle_state() doing the error handling (which is > still duplicated between enable and disable)? And as I mentioned > before, isn't libcpupower itself doing the same range checking? > > It looks like the get_idle_states() call is just for that redundant > check, and to print the name in an error... You could wait until you > actually get an error, and then just call lcpw.cpuidle_state_name on > the particular cpu and state id. That would get rid of the assumption > about all cpus having the same idle states, and be simpler. > > -Crystal > The code is functioning, it can be improved, let's fix a few more problems and let it in, and then it can be improved incrementally after that. John Kacur
On Fri, 2025-04-04 at 16:36 -0400, John Kacur wrote: > > On Thu, 13 Mar 2025, Crystal Wood wrote: > > > On Wed, 2025-03-12 at 11:41 -0300, Wander Lairson Costa wrote: > > > On Tue, Mar 11, 2025 at 04:30:19PM -0400, John B. Wyatt IV wrote: > > > > + "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLE-STATE', type=int, help='Disable IDLE-STATE on the selected CPUs.'), > > > > + "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLE-STATE', type=int, help='Enable IDLE-STATE on the selected CPUs.') > > > > + } > > > > > > > > parser = HelpMessageParser(description="tuna - Application Tuning Program") > > > > > > > > @@ -127,6 +132,9 @@ def gen_parser(): > > > > > > > > subparser = parser.add_subparsers(dest='command') > > > > > > > > + idle_set = subparser.add_parser('idle_set', > > > > + description='Manage CPU idle state disabling (requires libcpupower and it\'s Python bindings)', > > > > + help='Set all idle states on a given CPU-LIST.') s/it\'s/its/ > > > > s/Set all idle states/Manage idle states/ > > > > I still don't like the "idle_set" name. This does more than setting, > > unlike the standalone utility that we're apparently reimplementing. > > I agree that idle_set is not the ideal name since if you look at the > cpupower tool, this is just one function, maybe cpupower would be a > better name. It's a bit unfair to say we're reimplementing the tool. > The functionality is in upstream kernel code and John Wyatt created > python bindings for that, and that is what he is using here. But (as of now) this isn't using the python bindings to implement something higher-level; it's exposing them as a command line tool that has very similar functionality to the existing one. I'm not sure what being written in Python has to do with it, other than a motive for reimplementing, to make future features easier. > I think it is better to have all the tools for manipulating the > environment through one tool and not make the user go search for > multiple tools to get the job done. "all the tools for manipulating the environment" is an extremely broad mission... :-P So far it doesn't seem to align very well with the rest of what tuna does, which is managing processes/threads... is there a plan to tie them together so that you can have it set idle states based on what the tasks running on a given cpu need, and/or move tasks around based on that? > > Or just use "if cpu_list:" like the existing code does. > > > > That's also pretty obnoxious behavior of the code that sets > > args.cpu_list... > > > > > > + @staticmethod > > > > + def print_idle_info(cpu_list=[0]): > > > > This default is never used. cpu_list is only ever set to > > self.__cpu_list. Why is this a static method? > > This is a different cpu_list than the instantiated one. > This is which cpus the user wants the info from. I think > the name is confusing here. The only call (so far...) is literally "self.print_idle_info(self.__cpu_list)". Where are you seeing a different cpu list? When (even with future patches) would the instantiated cpu list be anything other than what the user asked for, and does that say anything about whether the class design makes sense? > > > > + for cpu in cpu_list: > > > > + idle_info = Cpupower.get_idle_info(cpu) > > > > + print_str = f""" > > > > +CPUidle driver: {idle_info["CPUidle-driver"]} > > > > +CPUidle governor: {idle_info["CPUidle-governor"]} > > > > +analyzing CPU {cpu} > > > > + > > > > +Number of idle states: {idle_info["idle-states-count"]} > > > > +Available idle states: {idle_info["available-idle-states"]} > > > > +""" > > > > + for state in idle_info["cpu-states"]: > > > > + print_str += f"""{state["Idle State Name"]} > > > > +Flags/Description: {state["Flags/Description"]} > > > > +Latency: {state["Latency"]} > > > > +Usage: {state["Usage"]} > > > > +Duration: {state["Duration"]} > > > > +""" > > > > + print( > > > > + print_str > > > > + ) > > > > > > nit: I don't think we need three lines for this statement. > > > > Why not just print each state inside the loop, instead of building one > > big string? > > valid, but I think John Wyatt was thinking the string could be > useful for other purposes. The json stuff I assume? Is this even in the right format for that? Is there any plan to json-ize other tuna functionality? It's really hard to review patches when there are vague or unstated notions of "this could be useful in the future"... especially when recent experience with rteval has highlighted the importance of avoiding overengineering. -Crystal
On Fri, 4 Apr 2025, Crystal Wood wrote: > On Fri, 2025-04-04 at 16:36 -0400, John Kacur wrote: > > > > On Thu, 13 Mar 2025, Crystal Wood wrote: > > > > > On Wed, 2025-03-12 at 11:41 -0300, Wander Lairson Costa wrote: > > > > On Tue, Mar 11, 2025 at 04:30:19PM -0400, John B. Wyatt IV wrote: > > > > > + "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLE-STATE', type=int, help='Disable IDLE-STATE on the selected CPUs.'), > > > > > + "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLE-STATE', type=int, help='Enable IDLE-STATE on the selected CPUs.') > > > > > + } > > > > > > > > > > parser = HelpMessageParser(description="tuna - Application Tuning Program") > > > > > > > > > > @@ -127,6 +132,9 @@ def gen_parser(): > > > > > > > > > > subparser = parser.add_subparsers(dest='command') > > > > > > > > > > + idle_set = subparser.add_parser('idle_set', > > > > > + description='Manage CPU idle state disabling (requires libcpupower and it\'s Python bindings)', > > > > > + help='Set all idle states on a given CPU-LIST.') > > s/it\'s/its/ > > > > > > > s/Set all idle states/Manage idle states/ > > > > > > I still don't like the "idle_set" name. This does more than setting, > > > unlike the standalone utility that we're apparently reimplementing. > > > > I agree that idle_set is not the ideal name since if you look at the > > cpupower tool, this is just one function, maybe cpupower would be a > > better name. It's a bit unfair to say we're reimplementing the tool. > > The functionality is in upstream kernel code and John Wyatt created > > python bindings for that, and that is what he is using here. > > But (as of now) this isn't using the python bindings to implement > something higher-level; it's exposing them as a command line tool that > has very similar functionality to the existing one. I'm not sure what > being written in Python has to do with it, other than a motive for > reimplementing, to make future features easier. People seem to be interested in testing rt with some power savings enabled, so we're trying to make it easier for people to do so with the tools they are used to using. I'm not really open to debating this. > > > I think it is better to have all the tools for manipulating the > > environment through one tool and not make the user go search for > > multiple tools to get the job done. > > "all the tools for manipulating the environment" is an extremely broad > mission... :-P :) how about all the tools for manipulating the environment that I think are important. > > So far it doesn't seem to align very well with the rest of what tuna > does, which is managing processes/threads... is there a plan to tie > them together so that you can have it set idle states based on what the > tasks running on a given cpu need, and/or move tasks around based on > that? > > > > Or just use "if cpu_list:" like the existing code does. > > > > > > That's also pretty obnoxious behavior of the code that sets > > > args.cpu_list... > > > > > > > > + @staticmethod > > > > > + def print_idle_info(cpu_list=[0]): > > > > > > This default is never used. cpu_list is only ever set to > > > self.__cpu_list. Why is this a static method? > > > > This is a different cpu_list than the instantiated one. > > This is which cpus the user wants the info from. I think > > the name is confusing here. > > The only call (so far...) is literally "self.print_idle_info(self.__cpu_list)". > > Where are you seeing a different cpu list? When (even with future All I meant is that self.__cpu_list is not the same as cpu_list. Since the names are confusingly similar, if a different variable really is needed, it would be best to rename the one local to the method / function. > patches) would the instantiated cpu list be anything other than what > the user asked for, and does that say anything about whether the class > design makes sense? > > > > > > + for cpu in cpu_list: > > > > > + idle_info = Cpupower.get_idle_info(cpu) > > > > > + print_str = f""" > > > > > +CPUidle driver: {idle_info["CPUidle-driver"]} > > > > > +CPUidle governor: {idle_info["CPUidle-governor"]} > > > > > +analyzing CPU {cpu} > > > > > + > > > > > +Number of idle states: {idle_info["idle-states-count"]} > > > > > +Available idle states: {idle_info["available-idle-states"]} > > > > > +""" > > > > > + for state in idle_info["cpu-states"]: > > > > > + print_str += f"""{state["Idle State Name"]} > > > > > +Flags/Description: {state["Flags/Description"]} > > > > > +Latency: {state["Latency"]} > > > > > +Usage: {state["Usage"]} > > > > > +Duration: {state["Duration"]} > > > > > +""" > > > > > + print( > > > > > + print_str > > > > > + ) > > > > > > > > nit: I don't think we need three lines for this statement. > > > > > > Why not just print each state inside the loop, instead of building one > > > big string? > > > > valid, but I think John Wyatt was thinking the string could be > > useful for other purposes. > > The json stuff I assume? Is this even in the right format for that? > Is there any plan to json-ize other tuna functionality? No, there is no play to json anything in tuna. Look, agreed, the string is messy, we'll fix it, I'm just saying he probably had something else in mind when he coded it. > > It's really hard to review patches when there are vague or unstated > notions of "this could be useful in the future"... especially when > recent experience with rteval has highlighted the importance of > avoiding overengineering. rteval doesn't have anything to do with this, the original coders were completely different people. However, rteval has stood the test of time. I don't like the multiple layers of inheritance, but sometimes I find suprising benefits to the original design. Anyway, you can discuss that with me separately, that doesn't have anything to do with this. What we are talking about here, is a single file that wraps the code in a class, this is fine, it often results in cleaner code even when object oriented features aren't used a lot. Your suggestions are fine, now, give him some time to implement them. John Kacur
diff --git a/tuna-cmd.py b/tuna-cmd.py index d0323f5..96a25a5 100755 --- a/tuna-cmd.py +++ b/tuna-cmd.py @@ -25,6 +25,7 @@ from tuna import tuna, sysfs, utils import logging import time import shutil +import tuna.cpupower as cpw def get_loglevel(level): if level.isdigit() and int(level) in range(0,5): @@ -115,8 +116,12 @@ def gen_parser(): "disable_perf": dict(action='store_true', help="Explicitly disable usage of perf in GUI for process view"), "refresh": dict(default=2500, metavar='MSEC', type=int, help="Refresh the GUI every MSEC milliseconds"), "priority": dict(default=(None, None), metavar="POLICY:RTPRIO", type=tuna.get_policy_and_rtprio, help="Set thread scheduler tunables: POLICY and RTPRIO"), - "background": dict(action='store_true', help="Run command as background task") - } + "background": dict(action='store_true', help="Run command as background task"), + "idle_info": dict(dest='idle_info', action='store_const', const=True, help='Print general idle information for the selected CPUs, including index values for IDLE-STATE.'), + "idle_state_disabled_status": dict(dest='idle_state_disabled_status', metavar='IDLE-STATE', type=int, help='Print whether IDLE-STATE is enabled on the selected CPUs.'), + "disable_idle_state": dict(dest='disable_idle_state', metavar='IDLE-STATE', type=int, help='Disable IDLE-STATE on the selected CPUs.'), + "enable_idle_state": dict(dest='enable_idle_state', metavar='IDLE-STATE', type=int, help='Enable IDLE-STATE on the selected CPUs.') + } parser = HelpMessageParser(description="tuna - Application Tuning Program") @@ -127,6 +132,9 @@ def gen_parser(): subparser = parser.add_subparsers(dest='command') + idle_set = subparser.add_parser('idle_set', + description='Manage CPU idle state disabling (requires libcpupower and it\'s Python bindings)', + help='Set all idle states on a given CPU-LIST.') isolate = subparser.add_parser('isolate', description="Move all allowed threads and IRQs away from CPU-LIST", help="Move all allowed threads and IRQs away from CPU-LIST") include = subparser.add_parser('include', description="Allow all threads to run on CPU-LIST", @@ -146,7 +154,6 @@ def gen_parser(): show_threads = subparser.add_parser('show_threads', description='Show thread list', help='Show thread list') show_irqs = subparser.add_parser('show_irqs', description='Show IRQ list', help='Show IRQ list') show_configs = subparser.add_parser('show_configs', description='List preloaded profiles', help='List preloaded profiles') - what_is = subparser.add_parser('what_is', description='Provides help about selected entities', help='Provides help about selected entities') gui = subparser.add_parser('gui', description="Start the GUI", help="Start the GUI") @@ -218,6 +225,13 @@ def gen_parser(): show_irqs_group.add_argument('-S', '--sockets', **MODS['sockets']) show_irqs.add_argument('-q', '--irqs', **MODS['irqs']) + idle_set_group = idle_set.add_mutually_exclusive_group(required=True) + idle_set_group.add_argument('-i', '--idle-info', **MODS['idle_info']) + idle_set_group.add_argument('-s', '--status', **MODS['idle_state_disabled_status']) + idle_set_group.add_argument('-d', '--disable', **MODS['disable_idle_state']) + idle_set_group.add_argument('-e', '--enable', **MODS['enable_idle_state']) + idle_set.add_argument('-c', '--cpus', **MODS['cpus']) + what_is.add_argument('thread_list', **POS['thread_list']) gui.add_argument('-d', '--disable_perf', **MODS['disable_perf']) @@ -647,6 +661,16 @@ def main(): print("Valid log levels: NOTSET, DEBUG, INFO, WARNING, ERROR") print("Log levels may be specified numerically (0-4)\n") + if args.command == 'idle_set': + if not cpw.have_cpupower: + print(f"Error: libcpupower bindings are not detected; please install libcpupower bindings from at least kernel {cpw.cpupower_required_kernel}.") + sys.exit(1) + + my_cpupower = cpw.Cpupower(args.cpu_list) + ret = my_cpupower.idle_set_handler(args) + if ret > 0: + sys.exit(ret) + if 'irq_list' in vars(args): ps = procfs.pidstats() if tuna.has_threaded_irqs(ps): diff --git a/tuna/cpupower.py b/tuna/cpupower.py new file mode 100755 index 0000000..7913027 --- /dev/null +++ b/tuna/cpupower.py @@ -0,0 +1,184 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2024 John B. Wyatt IV + +from typing import List +import tuna.utils as utils + +cpupower_required_kernel = "6.12" +have_cpupower = None + +try: + import raw_pylibcpupower as lcpw + lcpw.cpufreq_get_available_frequencies(0) + have_cpupower = True +except: + lcpw = None + have_cpupower = False + +if have_cpupower: + class Cpupower: + """The Cpupower class allows you to query and change the power states of the + cpu. + + You may query or change the cpus all at once or a list of the cpus provided to the constructor's cpulist argument. + + The bindings must be detected on the $PYTHONPATH variable. + + You must use have_cpupower variable to determine if the bindings were + detected in your code.""" + + LCPW_ERROR_TWO_CASE = 1 # enum for common error messages + LCPW_ERROR_THREE_CASE = 2 + + def __init__(self, cpu_list=None): + if cpu_list == None or cpu_list == []: # args.cpu_list == [] when the -c option is not used + self.__cpu_list = utils.get_all_cpu_list() + else: + self.__cpu_list = cpu_list + + @staticmethod + def handle_common_lcpw_errors(e, error_type, idle_name): + match e: + case 0: + pass + case -1: + print(f"Idlestate {idle_name} not available") + case -2: + print("Disabling is not supported by the kernel") + case -3: + if error_type == Cpupower.LCPW_ERROR_THREE_CASE: + print("No write access to disable/enable C-states: try using sudo") + else: + print(f"Not documented: {e}") + case _: + print(f"Not documented: {e}") + + @staticmethod + def get_idle_states(cpu): + """ + Get the c-states of a cpu. + + You can capture the return values with: + states_list, states_amt = get_idle_states() + + Returns + List[String]: list of cstates + Int: amt of cstates + """ + ret = [] + for cstate in range(lcpw.cpuidle_state_count(cpu)): + ret.append(lcpw.cpuidle_state_name(cpu,cstate)) + return ret, lcpw.cpuidle_state_count(cpu) + + @staticmethod + def get_idle_info(cpu=0): + idle_states, idle_states_amt = Cpupower.get_idle_states(cpu) + idle_states_list = [] + for idle_state in range(0, len(idle_states)): + idle_states_list.append( + { + "CPU ID": cpu, + "Idle State Name": idle_states[idle_state], + "Flags/Description": lcpw.cpuidle_state_desc(cpu, idle_state), + "Latency": lcpw.cpuidle_state_latency(cpu, idle_state), + "Usage": lcpw.cpuidle_state_usage(cpu, idle_state), + "Duration": lcpw.cpuidle_state_time(cpu, idle_state) + } + ) + idle_info = { + "CPUidle-driver": lcpw.cpuidle_get_driver(), + "CPUidle-governor": lcpw.cpuidle_get_governor(), + "idle-states-count": idle_states_amt, + "available-idle-states": idle_states, + "cpu-states": idle_states_list + } + return idle_info + + @staticmethod + def print_idle_info(cpu_list=[0]): + for cpu in cpu_list: + idle_info = Cpupower.get_idle_info(cpu) + print_str = f""" +CPUidle driver: {idle_info["CPUidle-driver"]} +CPUidle governor: {idle_info["CPUidle-governor"]} +analyzing CPU {cpu} + +Number of idle states: {idle_info["idle-states-count"]} +Available idle states: {idle_info["available-idle-states"]} +""" + for state in idle_info["cpu-states"]: + print_str += f"""{state["Idle State Name"]} +Flags/Description: {state["Flags/Description"]} +Latency: {state["Latency"]} +Usage: {state["Usage"]} +Duration: {state["Duration"]} +""" + print( + print_str + ) + + def idle_set_handler(self, args) -> int: + if args.idle_state_disabled_status != None: + cstate_index = args.idle_state_disabled_status + cstate_list, cstate_amt = self.get_idle_states(self.__cpu_list[0]) # Assuming all cpus have the same idle state + if cstate_index < 0 or cstate_index >= cstate_amt: + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") + return 1 + cstate_name = cstate_list[cstate_index] + ret = self.is_disabled_idle_state(cstate_index) + for i,e in enumerate(ret): + if e == 1: + print(f"CPU: {self.__cpu_list[i]} Idle state \"{cstate_name}\" is disabled.") + elif e == 0: + print(f"CPU: {self.__cpu_list[i]} Idle state \"{cstate_name}\" is enabled.") + else: + self.handle_common_lcpw_errors(e, self.LCPW_ERROR_TWO_CASE, cstate_name) + elif args.idle_info != None: + self.print_idle_info(self.__cpu_list) + return 0 + elif args.disable_idle_state != None: + cstate_index = args.disable_idle_state + cstate_list, cstate_amt = self.get_idle_states(self.__cpu_list[0]) # Assuming all cpus have the same idle state + if cstate_index < 0 or cstate_index >= cstate_amt: + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") + return 1 + cstate_name = cstate_list[cstate_index] + ret = self.disable_idle_state(cstate_index, 1) + for e in ret: + self.handle_common_lcpw_errors(e, self.LCPW_ERROR_THREE_CASE, cstate_name) + elif args.enable_idle_state != None: + cstate_index = args.enable_idle_state + cstate_list, cstate_amt = self.get_idle_states(self.__cpu_list[0]) # Assuming all cpus have the same idle state + if cstate_index < 0 or cstate_index >= cstate_amt: + print(f"Invalid idle state range. Total for this cpu is {cstate_amt}") + return 1 + cstate_name = cstate_list[cstate_index] + ret = self.disable_idle_state(cstate_index, 0) + for e in ret: + self.handle_common_lcpw_errors(e, self.LCPW_ERROR_THREE_CASE, cstate_name) + return 0 + + def disable_idle_state(self, state, disabled) -> List[int]: + """ + Disable or enable an idle state using the object's stored list of cpus. + + Args: + state (int): The cpu idle state index to disable or enable as an int starting from 0. + disabled (int): set to 1 to disable or 0 to enable. Less than 0 is an error. + """ + ret = [] + for cpu in self.__cpu_list: + ret.append(lcpw.cpuidle_state_disable(cpu, state, disabled)) + return ret + + def is_disabled_idle_state(self, state) -> List[int]: + """ + Query the idle state. + + Args: + state: The cpu idle state. 1 is disabled, 0 is enabled. Less than 0 is an error. + """ + ret = [] + for cpu in self.__cpu_list: + ret.append(lcpw.cpuidle_is_state_disabled(cpu, state)) + return ret