From 9436569e2accb88b4ac3898c665d860697472f89 Mon Sep 17 00:00:00 2001 From: Alec Warren <37227576+a-dubs@users.noreply.github.com> Date: Mon, 21 Aug 2023 18:40:11 -0400 Subject: [PATCH] collect-logs fix memory usage (SC-1590) (#4289) Reference:https://github.com/canonical/cloud-init/commit/9436569e2accb88b4ac3898c665d860697472f89 Conflict:(1)change "output = subp(cmd).stdout" to "output = subp(cmd)[0]" (2)add "import pytest" in test_logs.py (3)format diffs. collect-logs now streams certain outputs to avoid large memory usage _stream_command_output_to_file() was added to mimic functionality of _write_command_output_to_file() except instead of reading the output of subprocess calls into memory, it streams the outputs directly to the target file. This new function is used when the output of a subprocess call does not need to be saved into a variable. As far as usage goes, the main difference between the two functions is that the stream function does not return result of the subprocess call, while the write function does. Fixes GH-3994 LP: #1980150 --- cloudinit/cmd/devel/logs.py | 57 +++++++++++----- cloudinit/cmd/devel/tests/test_logs.py | 93 +++++++++++++++++++++++++- 2 files changed, 132 insertions(+), 18 deletions(-) diff --git a/cloudinit/cmd/devel/logs.py b/cloudinit/cmd/devel/logs.py index 84c157d..ef52cd0 100644 --- a/cloudinit/cmd/devel/logs.py +++ b/cloudinit/cmd/devel/logs.py @@ -8,6 +8,7 @@ import argparse from datetime import datetime import os import shutil +import subprocess import sys from cloudinit.cmd.devel import read_cfg_paths @@ -67,15 +68,29 @@ def _copytree_rundir_ignore_files(curdir, files): def _write_command_output_to_file(cmd, filename, msg, verbosity): """Helper which runs a command and writes output or error to filename.""" + ensure_dir(os.path.dirname(filename)) try: - out, _ = subp(cmd) + output = subp(cmd)[0] except ProcessExecutionError as e: write_file(filename, str(e)) _debug("collecting %s failed.\n" % msg, 1, verbosity) else: - write_file(filename, out) + write_file(filename, output) + _debug("collected %s\n" % msg, 1, verbosity) + return output + + +def _stream_command_output_to_file(cmd, filename, msg, verbosity): + """Helper which runs a command and writes output or error to filename.""" + ensure_dir(os.path.dirname(filename)) + try: + with open(filename, "w") as f: + subprocess.call(cmd, stdout=f, stderr=f) + except OSError as e: + write_file(filename, str(e)) + _debug("collecting %s failed.\n" % msg, 1, verbosity) + else: _debug("collected %s\n" % msg, 1, verbosity) - return out def _debug(msg, level, verbosity): @@ -108,23 +123,33 @@ def collect_logs(tarfile, include_userdata: bool, verbosity=0): with tempdir(dir='/tmp') as tmp_dir: log_dir = os.path.join(tmp_dir, log_dir) version = _write_command_output_to_file( - ['cloud-init', '--version'], - os.path.join(log_dir, 'version'), - "cloud-init --version", verbosity) + cmd=["cloud-init", "--version"], + filename=os.path.join(log_dir, "version"), + msg="cloud-init --version", + verbosity=verbosity, + ) dpkg_ver = _write_command_output_to_file( - ['dpkg-query', '--show', "-f=${Version}\n", 'cloud-init'], - os.path.join(log_dir, 'dpkg-version'), - "dpkg version", verbosity) + cmd=["dpkg-query", "--show", "-f=${Version}\n", "cloud-init"], + filename=os.path.join(log_dir, "dpkg-version"), + msg="dpkg version", + verbosity=verbosity, + ) if not version: version = dpkg_ver if dpkg_ver else "not-available" + print("version: ", version) _debug("collected cloud-init version: %s\n" % version, 1, verbosity) - _write_command_output_to_file( - ['dmesg'], os.path.join(log_dir, 'dmesg.txt'), - "dmesg output", verbosity) - _write_command_output_to_file( - ['journalctl', '--boot=0', '-o', 'short-precise'], - os.path.join(log_dir, 'journal.txt'), - "systemd journal of current boot", verbosity) + _stream_command_output_to_file( + cmd=["dmesg"], + filename=os.path.join(log_dir, "dmesg.txt"), + msg="dmesg output", + verbosity=verbosity, + ) + _stream_command_output_to_file( + cmd=["journalctl", "--boot=0", "-o", "short-precise"], + filename=os.path.join(log_dir, "journal.txt"), + msg="systemd journal of current boot", + verbosity=verbosity, + ) for log in CLOUDINIT_LOGS: _collect_file(log, log_dir, verbosity) diff --git a/cloudinit/cmd/devel/tests/test_logs.py b/cloudinit/cmd/devel/tests/test_logs.py index 4ef6d5d..ed10b50 100644 --- a/cloudinit/cmd/devel/tests/test_logs.py +++ b/cloudinit/cmd/devel/tests/test_logs.py @@ -1,6 +1,7 @@ # This file is part of cloud-init. See LICENSE file for license information. from datetime import datetime +import pytest import os import re from io import StringIO @@ -8,6 +9,7 @@ from io import StringIO from cloudinit.cmd.devel import logs from cloudinit.sources import INSTANCE_JSON_SENSITIVE_FILE from cloudinit.subp import subp +from cloudinit.subp import SubpResult, subp from cloudinit.util import load_file, write_file from cloudinit.tests.helpers import mock @@ -63,10 +65,25 @@ class TestCollectLogs: 'Unexpected command provided to subp: {0}'.format(cmd)) if cmd == ['tar', 'czvf', output_tarfile, date_logdir]: subp(cmd) # Pass through tar cmd so we can check output - return expected_subp[cmd_tuple], '' + return SubpResult(expected_subp[cmd_tuple], "") + + # the new _stream_command_output_to_file function uses subprocess.call + # instead of subp, so we need to mock that as well + def fake_subprocess_call(cmd, stdout=None, stderr=None): + cmd_tuple = tuple(cmd) + if cmd_tuple not in expected_subp: + raise AssertionError( + "Unexpected command provided to subprocess: {0}".format( + cmd + ) + ) + stdout.write(expected_subp[cmd_tuple]) fake_stderr = mock.MagicMock() mocker.patch(M_PATH + "subp", side_effect=fake_subp) + mocker.patch( + M_PATH + "subprocess.call", side_effect=fake_subprocess_call + ) mocker.patch(M_PATH + "sys.stderr", fake_stderr) mocker.patch(M_PATH + "CLOUDINIT_LOGS", [log1, log2]) mocker.patch(M_PATH + "CLOUDINIT_RUN_DIR", run_dir) @@ -142,7 +159,7 @@ class TestCollectLogs: 'Unexpected command provided to subp: {0}'.format(cmd)) if cmd == ['tar', 'czvf', output_tarfile, date_logdir]: subp(cmd) # Pass through tar cmd so we can check output - return expected_subp[cmd_tuple], '' + return SubpResult(expected_subp[cmd_tuple], "") fake_stderr = mock.MagicMock() @@ -167,3 +184,75 @@ class TestCollectLogs: ) ) fake_stderr.write.assert_any_call('Wrote %s\n' % output_tarfile) + + @pytest.mark.parametrize( + "cmd, expected_file_contents, expected_return_value", + [ + ( + ["echo", "cloud-init? more like cloud-innit!"], + "cloud-init? more like cloud-innit!\n", + "cloud-init? more like cloud-innit!\n", + ), + ( + ["ls", "/nonexistent-directory"], + ( + "Unexpected error while running command.\n" + "Command: ['ls', '/nonexistent-directory']\n" + "Exit code: 2\n" + "Reason: -\n" + "Stdout: \n" + "Stderr: ls: cannot access '/nonexistent-directory': " + "No such file or directory" + ), + None, + ), + ], + ) + def test_write_command_output_to_file( + self, + m_getuid, + tmpdir, + cmd, + expected_file_contents, + expected_return_value, + ): + m_getuid.return_value = 100 + output_file = tmpdir.join("test-output-file.txt") + + return_output = logs._write_command_output_to_file( + filename=output_file, + cmd=cmd, + msg="", + verbosity=1, + ) + + assert expected_return_value == return_output + assert expected_file_contents == load_file(output_file) + + @pytest.mark.parametrize( + "cmd, expected_file_contents", + [ + (["echo", "cloud-init, shmoud-init"], "cloud-init, shmoud-init\n"), + ( + ["ls", "/nonexistent-directory"], + ( + "ls: cannot access '/nonexistent-directory': " + "No such file or directory\n" + ), + ), + ], + ) + def test_stream_command_output_to_file( + self, m_getuid, tmpdir, cmd, expected_file_contents + ): + m_getuid.return_value = 100 + output_file = tmpdir.join("test-output-file.txt") + + logs._stream_command_output_to_file( + filename=output_file, + cmd=cmd, + msg="", + verbosity=1, + ) + + assert expected_file_contents == load_file(output_file) -- 2.33.0