Message ID | 20250228175441.674384-1-alex.bennee@linaro.org |
---|---|
State | New |
Headers | show |
Series | [RFC] gitlab: add a new build_unit job to track build size | expand |
Hi Alex, On 2/28/25 09:54, Alex Bennée wrote: > We want to reduce the total number of build units in the system to get > on our way to a single binary. It will help to have some numbers so > lets add a job to gitlab to track our progress. > That's a good idea! > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > Cc: Pierrick Bouvier <pierrick.bouvier@linaro.org> > Cc: Philippe Mathieu-Daudé <philmd@linaro.org> > Cc: Richard Henderson <richard.henderson@linaro.org> > --- > .gitlab-ci.d/check-units.py | 95 ++++++++++++++++++++++++++++++++++ > .gitlab-ci.d/static_checks.yml | 22 ++++++++ > 2 files changed, 117 insertions(+) > create mode 100755 .gitlab-ci.d/check-units.py > > diff --git a/.gitlab-ci.d/check-units.py b/.gitlab-ci.d/check-units.py > new file mode 100755 > index 0000000000..aca63bd481 > --- /dev/null > +++ b/.gitlab-ci.d/check-units.py > @@ -0,0 +1,95 @@ > +#!/usr/bin/env python3 > +# > +# check-units.py: check the number of compilation units and identify > +# those that are rebuilt multiple times > +# > +# Copyright (C) 2025 Linaro Ltd. > +# > +# SPDX-License-Identifier: GPL-2.0-or-later > + > +from os import access, R_OK, path > +from subprocess import check_output, CalledProcessError > +from sys import argv, exit > +import re > + > + > +def extract_build_units(cc_path): > + """ > + Extract the build units and their counds from compile_commands.json file. > + > + Returns: > + Hash table of ["unit"] = count > + """ > + > + # Make jq/shell do the heavy lifting > + cmd = f"jq < {cc_path} '.[] | .file' | sort | uniq -c | sort -rn" > + If we choose to have a dedicated python script, maybe we can simply: import json from collections import Counter j = json.load(open('build/compile_commands.json', 'r')) files = [f['file'] for f in j] occurences = Counter(files) It's just a suggestion, and the script is fine as it is as well. > + try: > + # Execute the shell command and capture the output > + result = check_output(cmd, shell=True) > + except CalledProcessError as exp: > + print(f"Error executing {cmd}: {exp}") > + exit(1) > + > + lines = result.decode().strip().split('\n') > + > + # Create a dictionary to store the build unit frequencies > + build_units = {} > + > + # extract from string of form: ' 65 "../../fpu/softfloat.c"' > + ext_pat = re.compile(r'^\s*(\d+)\s+"([^"]+)"') > + > + # strip leading ../ > + norm_pat = re.compile(r'^((\.\./)+|/+)') > + > + # Process each line of the output > + for line in lines: > + match = re.match(ext_pat, line) > + if match: > + count = int(match.group(1)) > + unit_path = re.sub(norm_pat, '', match.group(2)) > + > + # Store the count in the dictionary > + build_units[unit_path] = count > + else: > + print(f"couldn't process {line}") > + > + return build_units > + > + > +def analyse_units(build_units): > + """ > + Analyse the build units and report stats and the top 10 rebuilds > + """ > + > + print(f"Total source files: {len(build_units.keys())}") > + print(f"Total build units: {sum(units.values())}") > + > + # Create a sorted list by number of rebuilds > + sorted_build_units = sorted(build_units.items(), > + key=lambda item: item[1], > + reverse=True) > + > + print("Most rebuilt units:") > + for unit, count in sorted_build_units[:10]: > + print(f" {unit} built {count} times") > + > + print("Least rebuilt units:") > + for unit, count in sorted_build_units[-10:]: > + print(f" {unit} built {count} times") > + > + > +if __name__ == "__main__": > + if len(argv) != 2: > + script_name = path.basename(argv[0]) > + print(f"Usage: {script_name} <path_to_compile_commands.json>") > + exit(1) > + > + cc_path = argv[1] > + if path.isfile(cc_path) and access(cc_path, R_OK): > + units = extract_build_units(cc_path) > + analyse_units(units) > + exit(0) > + else: > + print(f"{cc_path} doesn't exist or isn't readable") > + exit(1) > diff --git a/.gitlab-ci.d/static_checks.yml b/.gitlab-ci.d/static_checks.yml > index c0ba453382..c3ed6de453 100644 > --- a/.gitlab-ci.d/static_checks.yml > +++ b/.gitlab-ci.d/static_checks.yml > @@ -70,3 +70,25 @@ check-rust-tools-nightly: > expire_in: 2 days > paths: > - rust/target/doc > + > +check-build-units: > + extends: .base_job_template > + stage: build > + image: $CI_REGISTRY_IMAGE/qemu/debian:$QEMU_CI_CONTAINER_TAG > + needs: > + job: amd64-debian-container > + before_script: > + - source scripts/ci/gitlab-ci-section > + - section_start setup "Install Tools" > + - apt install --assume-yes --no-install-recommends jq > + - section_end setup > + script: > + - mkdir build > + - cd build > + - section_start configure "Running configure" > + - ../configure > + - cd .. > + - section_end configure > + - section_start analyse "Analyse" > + - .gitlab-ci.d/check-units.py build/compile_commands.json > + - section_end analyse Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
diff --git a/.gitlab-ci.d/check-units.py b/.gitlab-ci.d/check-units.py new file mode 100755 index 0000000000..aca63bd481 --- /dev/null +++ b/.gitlab-ci.d/check-units.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# +# check-units.py: check the number of compilation units and identify +# those that are rebuilt multiple times +# +# Copyright (C) 2025 Linaro Ltd. +# +# SPDX-License-Identifier: GPL-2.0-or-later + +from os import access, R_OK, path +from subprocess import check_output, CalledProcessError +from sys import argv, exit +import re + + +def extract_build_units(cc_path): + """ + Extract the build units and their counds from compile_commands.json file. + + Returns: + Hash table of ["unit"] = count + """ + + # Make jq/shell do the heavy lifting + cmd = f"jq < {cc_path} '.[] | .file' | sort | uniq -c | sort -rn" + + try: + # Execute the shell command and capture the output + result = check_output(cmd, shell=True) + except CalledProcessError as exp: + print(f"Error executing {cmd}: {exp}") + exit(1) + + lines = result.decode().strip().split('\n') + + # Create a dictionary to store the build unit frequencies + build_units = {} + + # extract from string of form: ' 65 "../../fpu/softfloat.c"' + ext_pat = re.compile(r'^\s*(\d+)\s+"([^"]+)"') + + # strip leading ../ + norm_pat = re.compile(r'^((\.\./)+|/+)') + + # Process each line of the output + for line in lines: + match = re.match(ext_pat, line) + if match: + count = int(match.group(1)) + unit_path = re.sub(norm_pat, '', match.group(2)) + + # Store the count in the dictionary + build_units[unit_path] = count + else: + print(f"couldn't process {line}") + + return build_units + + +def analyse_units(build_units): + """ + Analyse the build units and report stats and the top 10 rebuilds + """ + + print(f"Total source files: {len(build_units.keys())}") + print(f"Total build units: {sum(units.values())}") + + # Create a sorted list by number of rebuilds + sorted_build_units = sorted(build_units.items(), + key=lambda item: item[1], + reverse=True) + + print("Most rebuilt units:") + for unit, count in sorted_build_units[:10]: + print(f" {unit} built {count} times") + + print("Least rebuilt units:") + for unit, count in sorted_build_units[-10:]: + print(f" {unit} built {count} times") + + +if __name__ == "__main__": + if len(argv) != 2: + script_name = path.basename(argv[0]) + print(f"Usage: {script_name} <path_to_compile_commands.json>") + exit(1) + + cc_path = argv[1] + if path.isfile(cc_path) and access(cc_path, R_OK): + units = extract_build_units(cc_path) + analyse_units(units) + exit(0) + else: + print(f"{cc_path} doesn't exist or isn't readable") + exit(1) diff --git a/.gitlab-ci.d/static_checks.yml b/.gitlab-ci.d/static_checks.yml index c0ba453382..c3ed6de453 100644 --- a/.gitlab-ci.d/static_checks.yml +++ b/.gitlab-ci.d/static_checks.yml @@ -70,3 +70,25 @@ check-rust-tools-nightly: expire_in: 2 days paths: - rust/target/doc + +check-build-units: + extends: .base_job_template + stage: build + image: $CI_REGISTRY_IMAGE/qemu/debian:$QEMU_CI_CONTAINER_TAG + needs: + job: amd64-debian-container + before_script: + - source scripts/ci/gitlab-ci-section + - section_start setup "Install Tools" + - apt install --assume-yes --no-install-recommends jq + - section_end setup + script: + - mkdir build + - cd build + - section_start configure "Running configure" + - ../configure + - cd .. + - section_end configure + - section_start analyse "Analyse" + - .gitlab-ci.d/check-units.py build/compile_commands.json + - section_end analyse
We want to reduce the total number of build units in the system to get on our way to a single binary. It will help to have some numbers so lets add a job to gitlab to track our progress. Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Cc: Pierrick Bouvier <pierrick.bouvier@linaro.org> Cc: Philippe Mathieu-Daudé <philmd@linaro.org> Cc: Richard Henderson <richard.henderson@linaro.org> --- .gitlab-ci.d/check-units.py | 95 ++++++++++++++++++++++++++++++++++ .gitlab-ci.d/static_checks.yml | 22 ++++++++ 2 files changed, 117 insertions(+) create mode 100755 .gitlab-ci.d/check-units.py