From a378b7e4f47375458651c0972e7cd813f6fe0a6b Mon Sep 17 00:00:00 2001 From: James Falcon Date: Wed, 26 Apr 2023 15:11:55 -0500 Subject: [PATCH] Make user/vendor data sensitive and remove log permissions (#2144) Because user data and vendor data may contain sensitive information, this commit ensures that any user data or vendor data written to instance-data.json gets redacted and is only available to root user. Also, modify the permissions of cloud-init.log to be 640, so that sensitive data leaked to the log isn't world readable. Additionally, remove the logging of user data and vendor data to cloud-init.log from the Vultr datasource. LP: #2013967 CVE: CVE-2023-1786 --- cloudinit/sources/DataSourceLXD.py | 11 +++++- cloudinit/sources/DataSourceVultr.py | 14 +++---- cloudinit/sources/__init__.py | 35 ++++++++++++++--- cloudinit/sources/tests/test_init.py | 58 ++++++++++++++++++++++++---- cloudinit/stages.py | 4 +- cloudinit/tests/test_stages.py | 18 +++++---- 6 files changed, 109 insertions(+), 31 deletions(-) diff --git a/cloudinit/sources/DataSourceLXD.py b/cloudinit/sources/DataSourceLXD.py index 732b32f..1e1e9e2 100644 --- a/cloudinit/sources/DataSourceLXD.py +++ b/cloudinit/sources/DataSourceLXD.py @@ -14,6 +14,7 @@ import os import requests from requests.adapters import HTTPAdapter +from typing import Tuple # pylint fails to import the two modules below. # These are imported via requests.packages rather than urllib3 because: @@ -173,8 +174,14 @@ class DataSourceLXD(sources.DataSource): _network_config = sources.UNSET _crawled_metadata = sources.UNSET - sensitive_metadata_keys = ( - 'merged_cfg', 'user.meta-data', 'user.vendor-data', 'user.user-data', + sensitive_metadata_keys: Tuple[ + str, ... + ] = sources.DataSource.sensitive_metadata_keys + ( + "user.meta-data", + "user.vendor-data", + "user.user-data", + "cloud-init.user-data", + "cloud-init.vendor-data", ) def _is_platform_viable(self) -> bool: diff --git a/cloudinit/sources/DataSourceVultr.py b/cloudinit/sources/DataSourceVultr.py index 68e1ff0..4d41d4e 100644 --- a/cloudinit/sources/DataSourceVultr.py +++ b/cloudinit/sources/DataSourceVultr.py @@ -10,6 +10,8 @@ from cloudinit import sources from cloudinit import util from cloudinit import version +from typing import Tuple + import cloudinit.sources.helpers.vultr as vultr LOG = log.getLogger(__name__) @@ -29,6 +31,10 @@ class DataSourceVultr(sources.DataSource): dsname = 'Vultr' + sensitive_metadata_keys: Tuple[ + str, ... + ] = sources.DataSource.sensitive_metadata_keys + ("startup-script",) + def __init__(self, sys_cfg, distro, paths): super(DataSourceVultr, self).__init__(sys_cfg, distro, paths) self.ds_cfg = util.mergemanydict([ @@ -54,13 +60,8 @@ class DataSourceVultr(sources.DataSource): self.get_datasource_data(self.metadata) # Dump some data so diagnosing failures is manageable - LOG.debug("Vultr Vendor Config:") - LOG.debug(util.json_dumps(self.metadata['vendor-data'])) LOG.debug("SUBID: %s", self.metadata['instance-id']) LOG.debug("Hostname: %s", self.metadata['local-hostname']) - if self.userdata_raw is not None: - LOG.debug("User-Data:") - LOG.debug(self.userdata_raw) return True @@ -141,7 +142,4 @@ if __name__ == "__main__": config = md['vendor-data'] sysinfo = vultr.get_sysinfo() - print(util.json_dumps(sysinfo)) - print(util.json_dumps(config)) - # vi: ts=4 expandtab diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index f2f2343..20cc397 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -13,7 +13,7 @@ import copy import json import os from collections import namedtuple -from typing import Dict, List # noqa: F401 +from typing import Dict, List, Tuple # noqa: F401 from cloudinit import dmi from cloudinit import importer @@ -103,7 +103,10 @@ def process_instance_metadata(metadata, key_path='', sensitive_keys=()): sub_key_path = key_path + '/' + key else: sub_key_path = key - if key in sensitive_keys or sub_key_path in sensitive_keys: + if ( + key.lower() in sensitive_keys + or sub_key_path.lower() in sensitive_keys + ): sens_keys.append(sub_key_path) if isinstance(val, str) and val.startswith('ci-b64:'): base64_encoded_keys.append(sub_key_path) @@ -124,6 +127,12 @@ def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): Replace any keys values listed in 'sensitive_keys' with redact_value. """ + # While 'sensitive_keys' should already sanitized to only include what + # is in metadata, it is possible keys will overlap. For example, if + # "merged_cfg" and "merged_cfg/ds/userdata" both match, it's possible that + # "merged_cfg" will get replaced first, meaning "merged_cfg/ds/userdata" + # no longer represents a valid key. + # Thus, we still need to do membership checks in this function. if not metadata.get('sensitive_keys', []): return metadata md_copy = copy.deepcopy(metadata) @@ -131,9 +140,14 @@ def redact_sensitive_keys(metadata, redact_value=REDACT_SENSITIVE_VALUE): path_parts = key_path.split('/') obj = md_copy for path in path_parts: - if isinstance(obj[path], dict) and path != path_parts[-1]: + if ( + path in obj + and isinstance(obj[path], dict) + and path != path_parts[-1] + ): obj = obj[path] - obj[path] = redact_value + if path in obj: + obj[path] = redact_value return md_copy @@ -215,7 +229,18 @@ class DataSource(CloudInitPickleMixin, metaclass=abc.ABCMeta): # N-tuple of keypaths or keynames redact from instance-data.json for # non-root users - sensitive_metadata_keys = ('merged_cfg', 'security-credentials',) + sensitive_metadata_keys: Tuple[str, ...] = ( + "merged_cfg", + "security-credentials", + "userdata", + "user-data", + "user_data", + "vendordata", + "vendor-data", + # Provide ds/vendor_data to avoid redacting top-level + # "vendor_data": {enabled: True} + "ds/vendor_data", + ) _ci_pkl_version = 1 diff --git a/cloudinit/sources/tests/test_init.py b/cloudinit/sources/tests/test_init.py index ae09cb1..bce2ab5 100644 --- a/cloudinit/sources/tests/test_init.py +++ b/cloudinit/sources/tests/test_init.py @@ -353,11 +353,32 @@ class TestDataSource(CiTestCase): 'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', 'region': 'myregion', - 'some': {'security-credentials': { - 'cred1': 'sekret', 'cred2': 'othersekret'}}}) + 'some': { + 'security-credentials': { + 'cred1': 'sekret', 'cred2': 'othersekret' + } + }, + "someother": { + "nested": { + "userData": "HIDE ME", + } + }, + "VENDOR-DAta": "HIDE ME TOO", + }, + ) self.assertCountEqual( - ('merged_cfg', 'security-credentials',), - datasource.sensitive_metadata_keys) + ( + "merged_cfg", + "security-credentials", + "userdata", + "user-data", + "user_data", + "vendordata", + "vendor-data", + "ds/vendor_data", + ), + datasource.sensitive_metadata_keys, + ) sys_info = { "python": "3.7", "platform": @@ -373,7 +394,11 @@ class TestDataSource(CiTestCase): 'base64_encoded_keys': [], 'merged_cfg': REDACT_SENSITIVE_VALUE, 'sensitive_keys': [ - 'ds/meta_data/some/security-credentials', 'merged_cfg'], + "ds/meta_data/VENDOR-DAta", + "ds/meta_data/some/security-credentials", + "ds/meta_data/someother/nested/userData", + "merged_cfg", + ], 'sys_info': sys_info, 'v1': { '_beta_keys': ['subplatform'], @@ -381,6 +406,7 @@ class TestDataSource(CiTestCase): 'availability_zone': 'myaz', 'cloud-name': 'subclasscloudname', 'cloud_name': 'subclasscloudname', + "cloud_id": "subclasscloudname", 'distro': 'ubuntu', 'distro_release': 'focal', 'distro_version': '20.04', @@ -401,10 +427,16 @@ class TestDataSource(CiTestCase): 'ds': { '_doc': EXPERIMENTAL_TEXT, 'meta_data': { + "VENDOR-DAta": REDACT_SENSITIVE_VALUE, 'availability_zone': 'myaz', 'local-hostname': 'test-subclass-hostname', 'region': 'myregion', - 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}}} + 'some': {'security-credentials': REDACT_SENSITIVE_VALUE}, + "someother": { + "nested": {"userData": REDACT_SENSITIVE_VALUE} + }, + }, + }, } self.assertCountEqual(expected, redacted) file_stat = os.stat(json_file) @@ -432,8 +464,18 @@ class TestDataSource(CiTestCase): "variant": "ubuntu", "dist": ["ubuntu", "20.04", "focal"]} self.assertCountEqual( - ('merged_cfg', 'security-credentials',), - datasource.sensitive_metadata_keys) + ( + "merged_cfg", + "security-credentials", + "userdata", + "user-data", + "user_data", + "vendordata", + "vendor-data", + "ds/vendor_data", + ), + datasource.sensitive_metadata_keys, + ) with mock.patch("cloudinit.util.system_info", return_value=sys_info): datasource.get_data() sensitive_json_file = self.tmp_path(INSTANCE_JSON_SENSITIVE_FILE, tmp) diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 731b298..59b0925 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -204,7 +204,9 @@ class Init(object): util.ensure_dirs(self._initial_subdirs()) log_file = util.get_cfg_option_str(self.cfg, 'def_log_file') if log_file: - util.ensure_file(log_file, mode=0o640, preserve_mode=True) + # At this point the log file should have already been created + # in the setupLogging function of log.py + util.ensure_file(log_file, mode=0o640, preserve_mode=False) perms = self.cfg.get('syslog_fix_perms') if not perms: perms = {} diff --git a/cloudinit/tests/test_stages.py b/cloudinit/tests/test_stages.py index a50836a..aeab17a 100644 --- a/cloudinit/tests/test_stages.py +++ b/cloudinit/tests/test_stages.py @@ -458,21 +458,25 @@ class TestInit_InitializeFilesystem: # Assert we create it 0o640 by default if it doesn't already exist assert 0o640 == stat.S_IMODE(log_file.stat().mode) - def test_existing_file_permissions_are_not_modified(self, init, tmpdir): - """If the log file already exists, we should not modify its permissions + def test_existing_file_permissions(self, init, tmpdir): + """Test file permissions are set as expected. + + CIS Hardening requires 640 permissions. These permissions are + currently hardcoded on every boot, but if there's ever a reason + to change this, we need to then ensure that they + are *not* set every boot. See https://bugs.launchpad.net/cloud-init/+bug/1900837. """ - # Use a mode that will never be made the default so this test will - # always be valid - mode = 0o606 log_file = tmpdir.join("cloud-init.log") log_file.ensure() - log_file.chmod(mode) + # Use a mode that will never be made the default so this test will + # always be valid + log_file.chmod(0o606) init._cfg = {"def_log_file": str(log_file)} init._initialize_filesystem() - assert mode == stat.S_IMODE(log_file.stat().mode) + assert 0o640 == stat.S_IMODE(log_file.stat().mode) # vi: ts=4 expandtab -- 2.33.0