From 03a14f5284b80dea659d2003cf8404f569f7d344 Mon Sep 17 00:00:00 2001 From: David Drysdale Date: Thu, 14 Nov 2024 16:37:58 +0000 Subject: [PATCH 1/5] Declare support for v4 of KeyMint HAL Bug: 377744414 Bug: 369375199 Test: vts_treble_vintf_vendor_test Change-Id: I1fc04834819cc9291d89a6ee6803cbc5d5d91437 --- .../android.hardware.security.keymint-service.rust.trusty.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trusty/keymint/android.hardware.security.keymint-service.rust.trusty.xml b/trusty/keymint/android.hardware.security.keymint-service.rust.trusty.xml index 3dc9c88ea..f74d21285 100644 --- a/trusty/keymint/android.hardware.security.keymint-service.rust.trusty.xml +++ b/trusty/keymint/android.hardware.security.keymint-service.rust.trusty.xml @@ -1,7 +1,7 @@ android.hardware.security.keymint - 3 + 4 IKeyMintDevice/default From 6105d9dc8aeec5ef54092854121cd6ff5859f3a4 Mon Sep 17 00:00:00 2001 From: Gabriel Biren Date: Mon, 18 Nov 2024 20:24:58 +0000 Subject: [PATCH 2/5] Create the mainline supplicant directory during initialization. Bug: 365585450 Test: Manual test - verify that the mainline supplicant can add/remove interfaces Change-Id: Ib41b361a8b032c04586f108be9d1933214934286 --- rootdir/init.rc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rootdir/init.rc b/rootdir/init.rc index 617e60a0f..eeafd4c85 100644 --- a/rootdir/init.rc +++ b/rootdir/init.rc @@ -781,6 +781,8 @@ on post-fs-data mkdir /data/misc/shared_relro 0771 shared_relro shared_relro mkdir /data/misc/systemkeys 0700 system system mkdir /data/misc/wifi 0770 wifi wifi + mkdir /data/misc/wifi/mainline_supplicant 0770 wifi wifi + mkdir /data/misc/wifi/mainline_supplicant/sockets 0770 wifi wifi mkdir /data/misc/wifi/sockets 0770 wifi wifi mkdir /data/misc/wifi/wpa_supplicant 0770 wifi wifi mkdir /data/misc/ethernet 0770 system system From 6028880ac00210661da0c9fa6a10324e1bfb41d3 Mon Sep 17 00:00:00 2001 From: Akilesh Kailash Date: Tue, 12 Nov 2024 10:16:40 -0800 Subject: [PATCH 3/5] Move snapuserd_test to postsubmit Bug: 338329603 Test: Build Change-Id: Ibee6be732078ea7e11fe7348772837b9b2699fbf Signed-off-by: Akilesh Kailash --- fs_mgr/TEST_MAPPING | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs_mgr/TEST_MAPPING b/fs_mgr/TEST_MAPPING index 192232d6c..13af1e2a3 100644 --- a/fs_mgr/TEST_MAPPING +++ b/fs_mgr/TEST_MAPPING @@ -27,7 +27,9 @@ }, { "name": "cow_api_test" - }, + } + ], + "postsubmit": [ { "name": "snapuserd_test" } From fdaaef95238d1de873040482938585cb01f724d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 19 Nov 2024 19:24:54 +0000 Subject: [PATCH 4/5] Revert "libprocessgroup: Remove __BEGIN_DECLS and __END_DECLS" This reverts commit fdf4432356ddb597f46cfb7b047ab4e0bb808ba2. Reason for revert: b/379796721 Change-Id: I34cdc69d5b8d11446da02106b7bd593a8c8151c3 --- libprocessgroup/include/processgroup/processgroup.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libprocessgroup/include/processgroup/processgroup.h b/libprocessgroup/include/processgroup/processgroup.h index 6a026a717..d27b56895 100644 --- a/libprocessgroup/include/processgroup/processgroup.h +++ b/libprocessgroup/include/processgroup/processgroup.h @@ -16,6 +16,7 @@ #pragma once +#include #include #include #include @@ -23,6 +24,8 @@ #include #include +__BEGIN_DECLS + static constexpr std::string CGROUPV2_HIERARCHY_NAME = "cgroup2"; bool CgroupsAvailable(); @@ -36,6 +39,8 @@ bool SetTaskProfiles(pid_t tid, const std::vector& profiles, bool SetProcessProfiles(uid_t uid, pid_t pid, const std::vector& profiles); bool SetUserProfiles(uid_t uid, const std::vector& profiles); +__END_DECLS + bool SetTaskProfiles(pid_t tid, std::initializer_list profiles, bool use_fd_cache = false); bool SetProcessProfiles(uid_t uid, pid_t pid, std::initializer_list profiles); @@ -45,6 +50,7 @@ bool SetTaskProfiles(pid_t tid, std::span profiles, bool SetProcessProfiles(uid_t uid, pid_t pid, std::span profiles); #endif +__BEGIN_DECLS #ifndef __ANDROID_VNDK__ @@ -90,3 +96,5 @@ bool getAttributePathForTask(const std::string& attr_name, pid_t tid, std::strin bool isProfileValidForProcess(const std::string& profile_name, uid_t uid, pid_t pid); #endif // __ANDROID_VNDK__ + +__END_DECLS From ae8313f8e694eeb019211066229ace7720dbe304 Mon Sep 17 00:00:00 2001 From: Vikram Auradkar Date: Mon, 11 Nov 2024 15:27:34 -0800 Subject: [PATCH 5/5] libprefetch: library to prefetch data using tracing. 1: This supports "mem" tracing only. 2: Replay option is modified to use "pread" instead of readahead. A simple utility binary "prefetch" is built which links to the library. The binary allows record, replay and verifying the generated metadata. Bug: 362507272 Test: cargo test passes all unit tests. Verify record, replay and dump options: ./prefetch record --duration 10 --path /data/test/trace-test ./prefetch replay --path /data/test/trace-test ./prefetch dump /data/test/trace-test --format csv Change-Id: I1661e49183c6120d2878510e609571fe6d608bb5 Signed-off-by: Vikram Auradkar Signed-off-by: Akilesh Kailash --- init/libprefetch/prefetch/Android.bp | 80 ++ init/libprefetch/prefetch/Cargo.lock | 743 ++++++++++++++ init/libprefetch/prefetch/Cargo.toml | 51 + init/libprefetch/prefetch/OWNERS | 3 + init/libprefetch/prefetch/prefetch.rc | 13 + init/libprefetch/prefetch/src/args.rs | 108 ++ .../prefetch/src/args/args_argh.rs | 217 ++++ init/libprefetch/prefetch/src/error.rs | 187 ++++ init/libprefetch/prefetch/src/format.rs | 823 +++++++++++++++ init/libprefetch/prefetch/src/lib.rs | 186 ++++ init/libprefetch/prefetch/src/main.rs | 41 + init/libprefetch/prefetch/src/replay.rs | 762 ++++++++++++++ init/libprefetch/prefetch/src/tracer/mem.rs | 897 ++++++++++++++++ init/libprefetch/prefetch/src/tracer/mod.rs | 965 ++++++++++++++++++ 14 files changed, 5076 insertions(+) create mode 100644 init/libprefetch/prefetch/Android.bp create mode 100644 init/libprefetch/prefetch/Cargo.lock create mode 100644 init/libprefetch/prefetch/Cargo.toml create mode 100644 init/libprefetch/prefetch/OWNERS create mode 100644 init/libprefetch/prefetch/prefetch.rc create mode 100644 init/libprefetch/prefetch/src/args.rs create mode 100644 init/libprefetch/prefetch/src/args/args_argh.rs create mode 100644 init/libprefetch/prefetch/src/error.rs create mode 100644 init/libprefetch/prefetch/src/format.rs create mode 100644 init/libprefetch/prefetch/src/lib.rs create mode 100644 init/libprefetch/prefetch/src/main.rs create mode 100644 init/libprefetch/prefetch/src/replay.rs create mode 100644 init/libprefetch/prefetch/src/tracer/mem.rs create mode 100644 init/libprefetch/prefetch/src/tracer/mod.rs diff --git a/init/libprefetch/prefetch/Android.bp b/init/libprefetch/prefetch/Android.bp new file mode 100644 index 000000000..778ea8a8c --- /dev/null +++ b/init/libprefetch/prefetch/Android.bp @@ -0,0 +1,80 @@ +// +// Copyright (C) 2024 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package { + default_team: "trendy_team_android_kernel", + default_applicable_licenses: ["Android-Apache-2.0"], +} + +rust_library_rlib { + name: "libprefetch_rs", + crate_name: "prefetch_rs", + srcs: ["src/lib.rs"], + rustlibs: [ + "libandroid_logger", + "libargh", + "libchrono", + "libcrc32fast", + "libcsv", + "liblibc", + "liblog_rust", + "liblru_cache", + "libnix", + "librand", + "librayon", + "libregex", + "libserde_cbor", + "libserde_json", + "libserde", + "libthiserror", + "libwalkdir", + "librustutils", + ], + prefer_rlib: true, + features: [ + "derive", + "error-context", + "help", + "std", + "usage", + "use_argh", + ], +} + +rust_binary { + name: "prefetch", + crate_name: "prefetch", + srcs: ["src/main.rs"], + rustlibs: [ + "libprefetch_rs", + "liblog_rust", + "libandroid_logger", + ], + prefer_rlib: true, + features: [ + "default", + "derive", + "error-context", + "help", + "std", + "usage", + "use_argh", + ], + init_rc: [ + "prefetch.rc", + ], +} + +// TODO: Add rust_test to enable unit testing - b/378554334 diff --git a/init/libprefetch/prefetch/Cargo.lock b/init/libprefetch/prefetch/Cargo.lock new file mode 100644 index 000000000..d6b214d26 --- /dev/null +++ b/init/libprefetch/prefetch/Cargo.lock @@ -0,0 +1,743 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + +[[package]] +name = "android_log-sys" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85965b6739a430150bdd138e2374a98af0c3ee0d030b3bb7fc3bddff58d0102e" + +[[package]] +name = "android_logger" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9ed09b18365ed295d722d0b5ed59c01b79a826ff2d2a8f73d5ecca8e6fb2f66" +dependencies = [ + "android_log-sys", + "env_logger", + "lazy_static", + "log", +] + +[[package]] +name = "argh" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab257697eb9496bf75526f0217b5ed64636a9cfafa78b8365c71bd283fcef93e" +dependencies = [ + "argh_derive", + "argh_shared", +] + +[[package]] +name = "argh_derive" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b382dbd3288e053331f03399e1db106c9fb0d8562ad62cb04859ae926f324fa6" +dependencies = [ + "argh_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "argh_shared" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5693f39141bda5760ecc4111ab08da40565d1771038c4a0250f03457ec707531" +dependencies = [ + "serde", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bincode" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b92615d57e4048e480bd7e3c2d7f6ec252819fffec95efbc30ec7c68744aa66c" +dependencies = [ + "byteorder", + "serde", +] + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bstr" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a40b47ad93e1a5404e6c18dec46b628214fee441c70f4ab5d6942142cc268a3d" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "serde", + "time", + "winapi 0.3.9", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "176dc175b78f56c0f321911d9c8eb2b77a78a4860b9c19db83835fea1a46649b" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "env_logger" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + +[[package]] +name = "getrandom" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hermit-abi" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.162" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" + +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "lru-cache" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" +dependencies = [ + "linked-hash-map", +] + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "nix" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +dependencies = [ + "bitflags", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.4", + "libc", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "prefetch" +version = "0.1.0" +dependencies = [ + "android_logger", + "argh", + "bincode", + "chrono", + "crc32fast", + "csv", + "env_logger", + "libc", + "log", + "lru-cache", + "memchr", + "nix", + "proc-macro2", + "quote", + "rand 0.8.5", + "rayon", + "rayon-core", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tempfile", + "thiserror", + "thiserror-impl", + "walkdir", +] + +[[package]] +name = "proc-macro2" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c" +dependencies = [ + "libc", + "rand 0.4.6", +] + +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi 0.3.9", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + +[[package]] +name = "redox_syscall" +version = "0.1.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" + +[[package]] +name = "regex" +version = "1.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a26af418b574bd56588335b3a3659a65725d4e636eb1016c2f9e3b38c7cc759" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "serde" +version = "1.0.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea1c6153794552ea7cf7cf63b1231a25de00ec90db326ba6264440fa08e31486" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d010a1623fbd906d51d650a9916aaefc05ffa0e4053ff7fe601167f3e715d194" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "tempfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ce2fe9db64b842314052e2421ac61a73ce41b898dc8e3750398b219c5fc1e0" +dependencies = [ + "kernel32-sys", + "libc", + "rand 0.3.23", + "redox_syscall", + "winapi 0.2.8", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +dependencies = [ + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", + "winapi 0.3.9", +] + +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/init/libprefetch/prefetch/Cargo.toml b/init/libprefetch/prefetch/Cargo.toml new file mode 100644 index 000000000..7da4fc68b --- /dev/null +++ b/init/libprefetch/prefetch/Cargo.toml @@ -0,0 +1,51 @@ +[package] +name = "prefetch" +version = "0.1.0" +edition = "2018" +default-run = "prefetch" + +[lib] +name = "prefetch_rs" +path = "src/lib.rs" + +[[bin]] +name = "prefetch" +path = "src/main.rs" + +[features] +default = ["use_argh"] +use_argh = ["argh"] + +[dependencies] +argh = { version = "0.1.10", optional = true } +chrono = { version = "=0.4.19", features = ["serde"] } +crc32fast = "1.2.1" +csv = "=1.1.6" +libc = "0.2.82" +log = "=0.4.14" +lru-cache = "0.1.2" +memchr = "=2.3.4" +nix = {version = "0.28", features = ["fs", "time", "feature", "mman", "uio"]} +proc-macro2 = "=1.0.26" +quote = "=1.0.9" +rand = "0.8.3" +rayon = "=1.5.0" +rayon-core = "=1.9.0" +regex = "1.4.5" +serde = { version = "*", features = ["derive"] } +serde_cbor = "0.11.2" +serde_derive = "=1.0.123" +serde_json = "=1.0.62" +thiserror = "=1.0.24" +thiserror-impl = "1.0.24" +walkdir = "2.3.2" + +# crates required for android builds +[target.'cfg(target_os = "android")'.dependencies] +android_logger = "0.10.1" + +# crates not present in android builds +[target.'cfg(not(target_os = "android"))'.dependencies] +bincode = "=0.9.0" +env_logger = "=0.8.4" +tempfile = "2.2.0" diff --git a/init/libprefetch/prefetch/OWNERS b/init/libprefetch/prefetch/OWNERS new file mode 100644 index 000000000..a1b54bf5c --- /dev/null +++ b/init/libprefetch/prefetch/OWNERS @@ -0,0 +1,3 @@ +akailash@google.com +auradkar@google.com +takayas@google.com diff --git a/init/libprefetch/prefetch/prefetch.rc b/init/libprefetch/prefetch/prefetch.rc new file mode 100644 index 000000000..9f2cb7f46 --- /dev/null +++ b/init/libprefetch/prefetch/prefetch.rc @@ -0,0 +1,13 @@ +service prefetch_record /system/bin/prefetch record --duration ${ro.prefetch_boot.duration_s:-0} + class main + user root + group root system + disabled + oneshot + +service prefetch_replay /system/bin/prefetch replay --io-depth ${ro.prefetch_boot.io_depth:-2} --max-fds ${ro.prefetch_boot.max_fds:-128} + class main + user root + group root system + disabled + oneshot diff --git a/init/libprefetch/prefetch/src/args.rs b/init/libprefetch/prefetch/src/args.rs new file mode 100644 index 000000000..4c1e68919 --- /dev/null +++ b/init/libprefetch/prefetch/src/args.rs @@ -0,0 +1,108 @@ +// Copyright (C) 2024 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub(crate) static DEFAULT_IO_DEPTH: u16 = 2; +pub(crate) static DEFAULT_MAX_FDS: u16 = 128; +pub(crate) static DEFAULT_EXIT_ON_ERROR: bool = false; + +mod args_argh; +use args_argh as args_internal; + +use std::path::Path; +use std::path::PathBuf; +use std::process::exit; + +pub use args_internal::OutputFormat; +pub use args_internal::ReplayArgs; +pub use args_internal::TracerType; +pub use args_internal::{DumpArgs, MainArgs, RecordArgs, SubCommands}; +use serde::Deserialize; +use serde::Serialize; + +use crate::Error; +use log::error; + +// Deserialized form of the config file +#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)] +pub struct ConfigFile { + // Files to be excluded in prefetch. These files might have been + // added in the record file while recording,but we do not want to + // replay these files. These can be two types of files: + // 1) installation-specific files (e.g. files in /data) and + // 2) large files which we do not want to load in replay (e.g. APK files). + pub files_to_exclude_regex: Vec, + // Files that are not in the record file, but need to be loaded during replay + pub additional_replay_files: Vec, +} + +fn verify_and_fix(args: &mut MainArgs) -> Result<(), Error> { + match &mut args.nested { + SubCommands::Record(arg) => { + if arg.debug && arg.int_path.is_none() { + arg.int_path = Some(PathBuf::from(format!("{}.int", arg.path.to_str().unwrap()))); + } + + if let Some(p) = &arg.int_path { + ensure_path_doesnt_exist(p)?; + } + } + SubCommands::Replay(arg) => { + ensure_path_exists(&arg.path)?; + if !arg.config_path.as_os_str().is_empty() { + ensure_path_exists(&arg.config_path)?; + } + } + SubCommands::Dump(arg) => { + ensure_path_exists(&arg.path)?; + } + } + Ok(()) +} + +/// Returns error if the given path at `p` exist. +pub(crate) fn ensure_path_doesnt_exist(p: &Path) -> Result<(), Error> { + if p.exists() { + Err(Error::InvalidArgs { + arg_name: "path".to_string(), + arg_value: p.display().to_string(), + error: "Path already exists".to_string(), + }) + } else { + Ok(()) + } +} + +/// Returns error if the given path at `p` doesn't exist. +pub(crate) fn ensure_path_exists(p: &Path) -> Result<(), Error> { + if p.is_file() { + Ok(()) + } else { + Err(Error::InvalidArgs { + arg_name: "path".to_string(), + arg_value: p.display().to_string(), + error: "Path does not exist".to_string(), + }) + } +} + +/// Builds `MainArgs` from command line arguments. On error prints error/help message +/// and exits. +pub fn args_from_env() -> MainArgs { + let mut args = args_internal::args_from_env(); + if let Err(e) = verify_and_fix(&mut args) { + error!("failed to verify args: {}", e); + exit(1); + } + args +} diff --git a/init/libprefetch/prefetch/src/args/args_argh.rs b/init/libprefetch/prefetch/src/args/args_argh.rs new file mode 100644 index 000000000..8ac95fce7 --- /dev/null +++ b/init/libprefetch/prefetch/src/args/args_argh.rs @@ -0,0 +1,217 @@ +// Copyright (C) 2024 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::{option::Option, path::PathBuf, result::Result::Ok, str::FromStr}; + +use argh::FromArgs; +use serde::Deserialize; + +use crate::args::DEFAULT_EXIT_ON_ERROR; +use crate::args::DEFAULT_IO_DEPTH; +use crate::args::DEFAULT_MAX_FDS; +use crate::Error; + +/// prefetch-rs +#[derive(Eq, PartialEq, Debug, Default, FromArgs)] +pub struct MainArgs { + /// subcommands + #[argh(subcommand)] + pub nested: SubCommands, +} + +/// Sub commands for prefetch functions +#[derive(Eq, PartialEq, Debug, FromArgs)] +#[argh(subcommand)] +pub enum SubCommands { + /// Records prefetch data. + Record(RecordArgs), + /// Replays from prefetch data + Replay(ReplayArgs), + /// Dump prefetch data in human readable format + Dump(DumpArgs), +} + +impl Default for SubCommands { + fn default() -> Self { + Self::Dump(DumpArgs::default()) + } +} + +fn default_path() -> PathBuf { + PathBuf::from("/metadata/prefetch/prefetch.pack") +} + +fn parse_tracing_instance(value: &str) -> Result, String> { + Ok(Some(value.to_string())) +} + +#[derive(Eq, PartialEq, Debug, Default, FromArgs)] +/// Records prefect data. +#[argh(subcommand, name = "record")] +pub struct RecordArgs { + /// duration in seconds to record the data + /// + /// On Android, if duration count is set to zero, recording + /// will continue until the property sys.boot_completed = 1. + #[argh(option)] + pub duration: u16, + + /// file path where the records will be written to + /// + /// A new file is created at the given path. If the path exists, it + /// will be overwritten + #[argh(option, default = "default_path()")] + pub path: PathBuf, + + /// when set an intermediate file will be created that provides more information + /// about collected data. + #[argh(option, default = "false")] + pub debug: bool, + + /// file path where the intermediate file will be written to + /// + /// A new file is created at the given path. Errors out if the file + /// already exists. + #[argh(option)] + pub int_path: Option, + + /// size of the trace buffer which holds trace events. We need larger + /// buffer on a system that has faster disks or has large number of events + /// enabled. Defaults to TRACE_BUFFER_SIZE_KIB KiB. + #[argh(option, long = "trace-buffer-size")] + pub trace_buffer_size_kib: Option, + + /// trace subsystem to use. "mem" subsystem is set by default. + #[argh(option, default = "Default::default()")] + pub tracing_subsystem: TracerType, + + /// if true enables all the needed trace events. And at the end it restores + /// the values of those events. + /// If false, assumes that user has setup the needed trace events. + #[argh(option, default = "true")] + pub setup_tracing: bool, + + /// if specified, works on a tracing instance (like /sys/kernel/tracing/instance/my_instance) + /// rather than using on shared global instance (i.e. /sys/kernel/tracing)." + #[argh( + option, + default = "Some(\"prefetch\".to_string())", + from_str_fn(parse_tracing_instance) + )] + pub tracing_instance: Option, +} + +/// Type of tracing subsystem to use. +#[derive(Deserialize, Clone, Eq, PartialEq, Debug)] +pub enum TracerType { + /// mem tracing subsystem relies on when a file's in-memory page gets added to the fs cache. + Mem, +} + +impl FromStr for TracerType { + type Err = Error; + fn from_str(s: &str) -> std::result::Result { + Ok(match s.to_lowercase().as_str() { + "mem" => Self::Mem, + _ => { + return Err(Error::InvalidArgs { + arg_name: "tracing_subsystem".to_owned(), + arg_value: s.to_owned(), + error: "unknown value".to_owned(), + }) + } + }) + } +} + +impl Default for TracerType { + fn default() -> Self { + Self::Mem + } +} + +#[derive(Eq, PartialEq, Debug, Default, FromArgs)] +/// Prefetch data from the recorded file. +#[argh(subcommand, name = "replay")] +pub struct ReplayArgs { + /// file path from where the records will be read + #[argh(option, default = "default_path()")] + pub path: PathBuf, + + /// IO depth. Number of IO that can go in parallel. + #[argh(option, long = "io-depth", default = "DEFAULT_IO_DEPTH")] + pub io_depth: u16, + + /// max number of open fds to cache + #[argh(option, arg_name = "max-fds", default = "DEFAULT_MAX_FDS")] + pub max_fds: u16, + + /// if true, command exits on encountering any error. + /// + /// This defaults to false as there is not harm prefetching if we encounter + /// non-fatal errors. + #[argh(option, default = "DEFAULT_EXIT_ON_ERROR")] + pub exit_on_error: bool, + + /// file path from where the prefetch config file will be read + #[argh(option, default = "PathBuf::new()")] + pub config_path: PathBuf, +} + +/// dump records file in given format +#[derive(Eq, PartialEq, Debug, Default, FromArgs)] +#[argh(subcommand, name = "dump")] +pub struct DumpArgs { + /// file path from where the records will be read + #[argh(option)] + pub path: PathBuf, + /// output format. One of json or csv. + /// Note: In csv format, few fields are excluded from the output. + #[argh(option)] + pub format: OutputFormat, +} + +#[derive(Deserialize, Eq, PartialEq, Debug)] +pub enum OutputFormat { + Json, + Csv, +} + +impl FromStr for OutputFormat { + type Err = Error; + fn from_str(s: &str) -> std::result::Result { + Ok(match s.to_lowercase().as_str() { + "csv" => Self::Csv, + "json" => Self::Json, + _ => { + return Err(Error::InvalidArgs { + arg_name: "format".to_owned(), + arg_value: s.to_owned(), + error: "unknown value".to_owned(), + }) + } + }) + } +} + +impl Default for OutputFormat { + fn default() -> Self { + Self::Json + } +} + +/// Build args struct from command line arguments +pub fn args_from_env() -> MainArgs { + argh::from_env() +} diff --git a/init/libprefetch/prefetch/src/error.rs b/init/libprefetch/prefetch/src/error.rs new file mode 100644 index 000000000..8dd938a7e --- /dev/null +++ b/init/libprefetch/prefetch/src/error.rs @@ -0,0 +1,187 @@ +// Copyright (C) 2024 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use thiserror::Error; + +use crate::{format::FileId, InodeInfo}; + +/// Enumerates all possible errors returned by this library. +#[derive(Debug, Error)] +pub enum Error { + /// Represents a failure to open a file. + #[error("Open error: {path}: {source}")] + Open { + /// The IO error + source: std::io::Error, + /// Path on which the operation failed. + path: String, + }, + + /// Represents a failure to create a file. + #[error("Create error. {path} {source}")] + Create { + /// The IO error + source: std::io::Error, + /// Path on which the operation failed. + path: String, + }, + + /// Represents a failure to read trace file. + #[error("Read error. {error}")] + Read { + /// Detailed error message. + error: String, + }, + + /// Represents a failure to write to a file. + #[error("Write error. {source}")] + Write { + /// The IO error + source: std::io::Error, + + /// file path + path: String, + }, + + /// Represents a failure to delete a file. + #[error("Delete error. {path} {source}")] + Delete { + /// The IO error + source: std::io::Error, + /// Path on which the operation failed. + path: String, + }, + + /// Represents a failure to stat a file. + #[error("Stat error. {path} {source}")] + Stat { + /// The IO error + source: std::io::Error, + /// Path on which the operation failed. + path: String, + }, + + /// Represents a failure to stat a file. + #[error("clone failed. {id} {source}")] + FileClone { + /// The IO error + source: std::io::Error, + /// File id for which we could not clone the file. + id: FileId, + }, + + /// Represents a failure to mmap a file. + #[error("mmap failed. {path} {error}")] + Mmap { + /// Detailed error message. + error: String, + /// Path on which the operation failed. + path: String, + }, + + /// Represents a failure to munmap a file. + #[error("munmap failed. {length} {error}")] + Munmap { + /// Detailed error message. + error: String, + /// Size of file which this munmap failed + length: usize, + }, + + /// Represents all other cases of `std::io::Error`. + /// + #[error(transparent)] + IoError( + /// The IO error + #[from] + std::io::Error, + ), + + /// Represents a failure to map FileId to path + /// + #[error("Failed to map id to path: {id}")] + IdNoFound { + /// File id for which path lookup failed. + id: FileId, + }, + + /// Indicates that the file is skipped for prefetching + /// because it is in the exclude files list. + /// + #[error("Skipped prefetching file from path: {path}")] + SkipPrefetch { + /// Path to file for which prefetching is skipped. + path: String, + }, + + /// Represents spurious InodeInfo or missing Record. + /// + #[error( + "Stale inode(s) info found.\n\ + missing_file_ids: {missing_file_ids:#?}\n\ + stale_inodes: {stale_inodes:#?} \n\ + missing_paths:{missing_paths:#?}" + )] + StaleInode { + /// FileIds for which InodeInfo is missing. + missing_file_ids: Vec, + + /// InodeInfos for which no records exist. + stale_inodes: Vec, + + /// InodeInfos in which no paths were found. + missing_paths: Vec, + }, + + /// Represents a failure to serialize records file. + #[error("Serialize error: {error}")] + Serialize { + /// Detailed error message. + error: String, + }, + + /// Represents a failure to deserialize records file. + #[error("Deserialize error: {error}")] + Deserialize { + /// Detailed error message. + error: String, + }, + + /// Represents a failure from thread pool. + #[error("Thread pool error: {error}")] + ThreadPool { + /// Detailed error message. + error: String, + }, + + /// Represents a failure to setup file. + #[error("Failed to setup prefetch: {error}")] + Custom { + /// Detailed error message. + error: String, + }, + + /// Represents a failure to parse args. + #[error("Failed to parse arg:{arg_name} value:{arg_value} error:{error}")] + InvalidArgs { + /// Arg name. + arg_name: String, + + /// Arg value. + arg_value: String, + + /// Detailed error message. + error: String, + }, +} diff --git a/init/libprefetch/prefetch/src/format.rs b/init/libprefetch/prefetch/src/format.rs new file mode 100644 index 000000000..ac89a74eb --- /dev/null +++ b/init/libprefetch/prefetch/src/format.rs @@ -0,0 +1,823 @@ +// Copyright (C) 2024 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::cmp::{max, min}; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt; +use std::fmt::Display; +use std::fs::{File, Metadata, OpenOptions}; +use std::hash::Hash; +use std::io::Write; +use std::ops::{Deref, DerefMut}; +use std::os::unix::fs::MetadataExt; +use std::time::SystemTime; + +use crc32fast::Hasher; +use log::debug; +use regex::Regex; +use serde::Deserializer; +use serde::Serialize; +use serde::{Deserialize, Serializer}; + +use crate::error::Error; + +static MAGIC_UUID: [u8; 16] = [ + 0x10, 0x54, 0x3c, 0xb8, 0x60, 0xdb, 0x49, 0x45, 0xa1, 0xd5, 0xde, 0xa7, 0xd2, 0x3b, 0x05, 0x49, +]; +static MAJOR_VERSION: u16 = 0; +static MINOR_VERSION: u16 = 1; + +/// Represents inode number which is unique within a filesystem. +pub(crate) type InodeNumber = u64; + +/// Represents device number which is unique for given block device. +pub(crate) type DeviceNumber = u64; + +/// Convenience name for string that represents a path. +pub(crate) type PathString = String; + +/// Represents unique file id across filesystems. +#[derive(Clone, Debug, Deserialize, Eq, Hash, Default, PartialEq, PartialOrd, Ord, Serialize)] +pub struct FileId(pub u64); + +impl Display for FileId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +fn serialize_hashmap( + value: &HashMap, + serializer: S, +) -> Result +where + S: Serializer, +{ + let mut btree = BTreeMap::new(); + for (k, v) in value { + btree.insert(k.clone(), v.clone()); + } + btree.serialize(serializer) +} + +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub(crate) struct SerializableHashMap< + K: Ord + Serialize + Clone + Hash + PartialEq, + V: Serialize + Clone, +> { + #[serde(serialize_with = "serialize_hashmap")] + pub map: HashMap, +} + +impl Deref for SerializableHashMap +where + K: Ord + Serialize + Clone + Hash + PartialEq, + V: Serialize + Clone, +{ + type Target = HashMap; + fn deref(&self) -> &Self::Target { + &self.map + } +} + +impl DerefMut for SerializableHashMap +where + K: Ord + Serialize + Clone + Hash + PartialEq, + V: Serialize + Clone, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.map + } +} + +/// The InodeInfo is unique per (device, inode) combination. It is +/// used to verify that we are prefetching a file for which we generated +/// the records for. +/// `Record` refers to this information with a unique `FileId`. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct InodeInfo { + // Inode number of the file. + pub(crate) inode_number: InodeNumber, + + // File size in bytes. + pub(crate) file_size: u64, + + // Helps to get to a file from a Record. The field is used to get to the file + // that needs to be prefetched. + // + // This struct is built by getting data from trace lines and querying filesystem + // for other fields about the file/inode. + // + // One instance per file to be prefetched. A file/inode can have multiple paths. + // We store multiple paths so that we can still get to it if some of the + // paths get deleted. + // + // See comments for `Record`. + #[serde(deserialize_with = "check_inode_info_paths")] + pub(crate) paths: Vec, + + // Block device number on which the file is located. + pub(crate) device_number: DeviceNumber, +} + +impl InodeInfo { + /// Returns InodeInfo. + pub fn new( + inode_number: InodeNumber, + file_size: u64, + paths: Vec, + device_number: DeviceNumber, + ) -> Self { + Self { inode_number, file_size, paths, device_number } + } +} + +// Helps us check block alignment. +// +// A records file can have multiple FsInfos. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct FsInfo { + // This is filesystem block size and is not underlying device's block size + pub(crate) block_size: u64, +} + +/// Prefetch record. +/// Each record translates to one filesystem `read()` request. +/// +/// Tracer builds `Record` by parsing trace lines or by querying filesystem. +/// +/// Multiple `Record`s can belong to a single InodeInfo. For example if there were two +/// reads for file `/data/my.apk` which is assigned FileId 10 at offsets 0 and 8k of length +/// 1 byte each then we will have two `Records` in `RecordsFile` that look like +/// `Record {file_id: 10, offset: 0, length: 1, timestamp: t1}` +/// `Record {file_id: 10, offset: 8192, length: 1, timestamp: t2}` +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +pub struct Record { + /// Points to the file that should be fetched./ file_id is unique per `InodeInfo` + /// in a `RecordsFile` + pub file_id: FileId, + + /// start offset to fetch data from. This is FsInfo.block_size aligned. + pub offset: u64, + + /// length of the read. This is generally rounded up to Fs.Info.block_size + /// except when the rounding up crosses `InodeInfo.file_size` + pub length: u64, + + /// Timestamp in nanoseconds since the start when the data was loaded. + pub timestamp: u64, +} + +impl Record { + /// Returns a new record if two records belong to same file and overlap. + fn overlaps(&self, other: &Self) -> Option { + if self.file_id == other.file_id { + let self_start = self.offset; + let self_end = self.offset + self.length; + let other_start = other.offset; + let other_end = other.offset + other.length; + + if (self_start <= other_end) && (self_end >= other_start) { + let offset = min(self_start, other_start); + let length = max(self_end, other_end) - offset; + return Some(Self { + file_id: self.file_id.clone(), + offset, + length, + timestamp: min(self.timestamp, other.timestamp), + }); + } + } + None + } +} + +fn group_record_by_file_id(records: Vec) -> Vec { + let mut map: HashMap> = HashMap::new(); + + for record in &records { + let recs = map.entry(record.file_id.clone()).or_default(); + recs.entry(record.offset).or_insert_with(|| record.clone()); + } + + let mut grouped = vec![]; + for record in &records { + if let Some(inode) = map.get(&record.file_id) { + for rec in inode.values() { + grouped.push(rec.clone()); + } + } + let _ = map.remove(&record.file_id); + } + + grouped +} + +/// When records are coalesced, because their file ids match and IO offsets overlap, the least +/// timestamp of the coalesced records is retained. +pub(crate) fn coalesce_records(records: Vec, group_by_file_id: bool) -> Vec { + let records = if group_by_file_id { group_record_by_file_id(records) } else { records }; + + let mut coalesced = vec![]; + let mut current: Option = None; + for r in records { + current = match current { + None => Some(r), + Some(c) => { + let merged = c.overlaps(&r); + match merged { + None => { + coalesced.push(c); + Some(r) + } + Some(m) => Some(m), + } + } + } + } + if let Some(r) = current { + coalesced.push(r); + } + coalesced +} + +// Records file header. +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct Header { + /// magic number as uuid to identify the header/format. + #[serde(deserialize_with = "check_magic")] + magic: [u8; 16], + + // major version number. + #[serde(deserialize_with = "check_major_number")] + major_number: u16, + + // minor version number. + #[serde(deserialize_with = "check_minor_number")] + minor_number: u16, + + /// timestamp when the records file was generated. + date: SystemTime, + + /// Checksum of the `RecordsFile` with `digest` being empty vector. + digest: u32, +} + +fn check_version_number<'de, D>( + deserializer: D, + expected: u16, + version_type: &str, +) -> Result +where + D: Deserializer<'de>, +{ + let found = u16::deserialize(deserializer)?; + if expected != found { + return Err(serde::de::Error::custom(format!( + "Failed to parse {} version. Expected: {} Found: {}", + version_type, expected, found + ))); + } + Ok(found) +} + +fn check_major_number<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + check_version_number(deserializer, MAJOR_VERSION, "major") +} + +fn check_minor_number<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + check_version_number(deserializer, MINOR_VERSION, "minor") +} + +fn check_magic<'de, D>(deserializer: D) -> Result<[u8; 16], D::Error> +where + D: Deserializer<'de>, +{ + let found: [u8; 16] = <[u8; 16]>::deserialize(deserializer)?; + if found != MAGIC_UUID { + return Err(serde::de::Error::custom(format!( + "Failed to parse magic number. Expected: {:?} Found: {:?}", + MAGIC_UUID, found + ))); + } + Ok(found) +} + +fn check_inode_info_paths<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let parsed: Vec = Vec::deserialize(deserializer)?; + if parsed.is_empty() { + return Err(serde::de::Error::custom("No paths found for in InodeInfo")); + } + Ok(parsed) +} + +// Helper inner struct of RecordsFile meant to verify checksum. +#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)] +pub(crate) struct RecordsFileInner { + // One instance per mounted block device. + pub(crate) filesystems: SerializableHashMap, + + /// Helps to get to a file path from a given `FileId`. + /// One instance per file to be prefetched. + pub(crate) inode_map: SerializableHashMap, + + /// Helps to get to a file and offset to be replayed.. + /// + // The records are chronologically arranged meaning the data that + // needs first is at the beginning of the vector and the data that + // needs last is at the end. + // + // One instance per part of the file that needs to be prefetched. + pub records: Vec, +} + +/// Deserialized form of records file. +#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)] +#[serde(remote = "Self")] +pub struct RecordsFile { + /// Helps the prefetch tool to parse rest of the file + pub header: Header, + + /// Helps the prefetch tool to verify checksum. + pub(crate) inner: RecordsFileInner, +} + +impl RecordsFile { + /// Given file id, looks up path of the file and returns open File handle. + pub fn open_file(&self, id: FileId, exclude_files_regex: &[Regex]) -> Result { + if let Some(inode) = self.inner.inode_map.get(&id) { + let path = inode.paths.first().unwrap(); + + for regex in exclude_files_regex { + if regex.is_match(path) { + return Err(Error::SkipPrefetch { path: path.to_owned() }); + } + } + debug!("Opening {} file {}", id.0, path); + OpenOptions::new() + .read(true) + .write(false) + .open(path) + .map_err(|source| Error::Open { source, path: path.to_owned() }) + } else { + Err(Error::IdNoFound { id }) + } + } + + /// Inserts given record in RecordsFile + pub fn insert_record(&mut self, records: Record) { + self.inner.records.push(records); + } + + /// Inserts given InodeInfo into in RecordsFile. + pub fn insert_or_update_inode_info(&mut self, id: FileId, info: InodeInfo) { + if let Some(inode) = self.inner.inode_map.get_mut(&id) { + if let Some(first_path) = info.paths.first() { + inode.paths.push(first_path.clone()); + } + } else { + self.inner.inode_map.insert(id, info); + } + } + + /// Verifies the integrity of records file. + /// + /// check saves us from serializing a improperly built record file or replaying an inconsistent + /// `RecordFile`. + /// + /// Note: check only works on the `RecordsFile` and doesn't access filesystem. We limit the + /// scope so that we avoid issuing filesystem operations(directory lookup, stats) twice - once + /// during check and once during replaying. + pub fn check(&self) -> Result<(), Error> { + let mut unique_files = HashSet::new(); + let mut missing_file_ids = vec![]; + + for record in &self.inner.records { + if !self.inner.inode_map.contains_key(&record.file_id) { + missing_file_ids.push(record.file_id.clone()); + } + unique_files.insert(record.file_id.clone()); + } + + let mut stale_inodes = vec![]; + let mut missing_paths = vec![]; + for (file_id, inode_info) in &self.inner.inode_map.map { + if inode_info.paths.is_empty() { + missing_paths.push(inode_info.clone()); + } + if !unique_files.contains(file_id) { + stale_inodes.push(inode_info.clone()); + } + } + + if !stale_inodes.is_empty() || !missing_paths.is_empty() || !missing_file_ids.is_empty() { + return Err(Error::StaleInode { stale_inodes, missing_paths, missing_file_ids }); + } + + Ok(()) + } + + /// Builds InodeInfo from args and inserts inode info in RecordsFile. + pub fn insert_or_update_inode(&mut self, id: FileId, stat: &Metadata, path: PathString) { + self.insert_or_update_inode_info( + id, + InodeInfo { + inode_number: stat.ino(), + file_size: stat.len(), + paths: vec![path], + device_number: stat.dev(), + }, + ) + } + + /// Serialize records in the form of csv. + pub fn serialize_records_to_csv(&self, writer: &mut dyn Write) -> Result<(), Error> { + let mut wtr = csv::Writer::from_writer(writer); + + #[derive(Serialize)] + struct TempRecord<'a> { + timestamp: u64, + file: &'a PathString, + offset: u64, + length: u64, + file_size: u64, + } + + for record in &self.inner.records { + if let Some(inode_info) = self.inner.inode_map.get(&record.file_id) { + let mut inode_info = inode_info.clone(); + inode_info.paths.sort(); + + if let Some(first_path) = inode_info.paths.first().cloned() { + // Clone the &String inside Option + let record = TempRecord { + timestamp: record.timestamp, + file: &first_path, // Now you have &String + offset: record.offset, + length: record.length, + file_size: inode_info.file_size, + }; + wtr.serialize(&record) + .map_err(|e| Error::Serialize { error: e.to_string() })?; + } + } + } + wtr.flush()?; + Ok(()) + } + + fn compute_digest(&mut self) -> Result { + self.header.digest = Default::default(); + let serialized = serde_cbor::to_vec(self) + .map_err(|source| Error::Serialize { error: source.to_string() })?; + + let mut hasher = Hasher::new(); + hasher.update(&serialized); + + Ok(hasher.finalize()) + } + + /// Convenience wrapper around serialize that adds checksum/digest to the file + /// to verify file consistency during replay/deserialize. + pub fn add_checksum_and_serialize(&mut self) -> Result, Error> { + self.header.digest = self.compute_digest()?; + + serde_cbor::to_vec(self).map_err(|source| Error::Serialize { error: source.to_string() }) + } +} + +impl Default for Header { + fn default() -> Self { + Self { + major_number: MAJOR_VERSION, + minor_number: MINOR_VERSION, + date: SystemTime::now(), + digest: 0, + magic: MAGIC_UUID, + } + } +} + +// Wrapper around deserialize to check any inconsistencies in the file format. +impl<'de> Deserialize<'de> for RecordsFile { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + let rf = Self::deserialize(deserializer)?; + + rf.check().map_err(|e| { + serde::de::Error::custom(format!("failed to validate records file: {}", e)) + })?; + + let mut zero_digest = rf.clone(); + zero_digest.header.digest = 0; + let digest = + zero_digest.compute_digest().map_err(|e| serde::de::Error::custom(format!("{}", e)))?; + + if digest != rf.header.digest { + return Err(serde::de::Error::custom(format!( + "file consistency check failed. Expected: {}. Found: {}", + digest, rf.header.digest + ))); + } + + Ok(rf) + } +} + +// Wrapper around serialize to check any inconsistencies in the file format before serializing +impl Serialize for RecordsFile { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: Serializer, + { + self.check().map(|_| self).map_err(|e| { + serde::ser::Error::custom(format!("failed to validate records file: {}", e)) + })?; + Self::serialize(self, serializer) + } +} + +#[cfg(test)] +pub mod tests { + + use std::assert_eq; + + use super::*; + + #[test] + fn test_major_version_mismatch() { + let mut rf = RecordsFile::default(); + + rf.header.major_number += 1; + + let serialized: Result = + serde_cbor::from_slice(&serde_cbor::to_vec(&rf).unwrap()); + + assert_eq!( + serialized.unwrap_err().to_string(), + format!( + "Failed to parse major version. Expected: {} Found: {}", + MAJOR_VERSION, + MAJOR_VERSION + 1 + ) + ); + } + + #[test] + fn test_minor_version_mismatch() { + let mut rf = RecordsFile::default(); + + rf.header.minor_number += 1; + + let serialized: Result = + serde_cbor::from_slice(&serde_cbor::to_vec(&rf).unwrap()); + + assert_eq!( + serialized.unwrap_err().to_string(), + format!( + "Failed to parse minor version. Expected: {} Found: {}", + MINOR_VERSION, + MINOR_VERSION + 1 + ) + ); + } + + #[test] + fn deserialize_inode_info_without_path() { + let inode = InodeInfo { inode_number: 1, file_size: 10, paths: vec![], device_number: 1 }; + let serialized = serde_cbor::to_vec(&inode).unwrap(); + let deserialized: Result = + serde_cbor::from_slice(&serialized); + assert_eq!( + deserialized.unwrap_err().to_string(), + "No paths found for in InodeInfo".to_owned() + ); + } + #[test] + fn test_serialize_records_to_csv() { + let mut rf = RecordsFile::default(); + let file_count = 4; + for i in 0..file_count { + rf.insert_or_update_inode_info( + FileId(i), + InodeInfo { + inode_number: i, + file_size: i * 10, + paths: vec![format!("/hello/{}", i)], + device_number: i + 10, + }, + ) + } + for i in 0..10 { + rf.insert_record(Record { + file_id: FileId(i % file_count), + offset: i * 3, + length: i + 4, + timestamp: i * file_count, + }); + } + + let mut buf = vec![]; + rf.serialize_records_to_csv(&mut buf).unwrap(); + + let data = String::from_utf8(buf).unwrap(); + assert_eq!( + data, + "timestamp,file,offset,length,file_size\n\ + 0,/hello/0,0,4,0\n\ + 4,/hello/1,3,5,10\n\ + 8,/hello/2,6,6,20\n\ + 12,/hello/3,9,7,30\n\ + 16,/hello/0,12,8,0\n\ + 20,/hello/1,15,9,10\n\ + 24,/hello/2,18,10,20\n\ + 28,/hello/3,21,11,30\n\ + 32,/hello/0,24,12,0\n\ + 36,/hello/1,27,13,10\n" + ); + } + + fn new_record(file: u64, offset: u64, length: u64, timestamp: u64) -> Record { + Record { file_id: FileId(file), offset, length, timestamp } + } + + #[test] + fn test_coalesced_without_group() { + let non_coalescable_same_inode = + vec![new_record(1, 2, 3, 4), new_record(1, 6, 3, 5), new_record(1, 10, 3, 6)]; + assert_eq!( + coalesce_records(non_coalescable_same_inode.clone(), false), + non_coalescable_same_inode + ); + + let non_coalescable_different_inode = + vec![new_record(1, 2, 3, 4), new_record(2, 5, 3, 5), new_record(3, 8, 3, 6)]; + assert_eq!( + coalesce_records(non_coalescable_different_inode.clone(), false), + non_coalescable_different_inode + ); + + let some_coalesced = + vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(3, 8, 3, 6)]; + assert_eq!( + coalesce_records(some_coalesced, false), + vec![new_record(1, 2, 6, 4), new_record(3, 8, 3, 6),] + ); + + let coalesced_into_one = + vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(1, 8, 3, 6)]; + assert_eq!(coalesce_records(coalesced_into_one, false), vec![new_record(1, 2, 9, 4)]); + + let no_grouping_or_coalescing = + vec![new_record(1, 2, 3, 4), new_record(3, 8, 3, 5), new_record(1, 5, 3, 6)]; + assert_eq!( + coalesce_records(no_grouping_or_coalescing, false), + vec![new_record(1, 2, 3, 4), new_record(3, 8, 3, 5), new_record(1, 5, 3, 6),] + ); + } + + #[test] + fn test_coalesced_with_grouping() { + let non_coalescable_same_inode = + vec![new_record(1, 2, 3, 4), new_record(1, 6, 3, 5), new_record(1, 10, 3, 6)]; + assert_eq!( + coalesce_records(non_coalescable_same_inode.clone(), true), + non_coalescable_same_inode + ); + + let non_coalescable_different_inode = + vec![new_record(1, 2, 3, 4), new_record(2, 5, 3, 5), new_record(3, 8, 3, 6)]; + assert_eq!( + coalesce_records(non_coalescable_different_inode.clone(), true), + non_coalescable_different_inode + ); + + let some_coalesced = + vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(3, 8, 3, 6)]; + assert_eq!( + coalesce_records(some_coalesced, true), + vec![new_record(1, 2, 6, 4), new_record(3, 8, 3, 6),] + ); + + let coalesced_into_one = + vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(1, 8, 3, 6)]; + assert_eq!(coalesce_records(coalesced_into_one, true), vec![new_record(1, 2, 9, 4)]); + + let some_grouped_coalesced = + vec![new_record(1, 2, 3, 4), new_record(3, 8, 3, 5), new_record(1, 5, 3, 6)]; + assert_eq!( + coalesce_records(some_grouped_coalesced, true), + vec![new_record(1, 2, 6, 4), new_record(3, 8, 3, 5),] + ); + } + + #[test] + fn check_missing_records() { + let mut rf = RecordsFile::default(); + rf.inner.inode_map.insert( + FileId(0), + InodeInfo { + inode_number: 0, + file_size: 1, + paths: vec!["hello".to_owned()], + device_number: 2, + }, + ); + rf.insert_record(Record { file_id: FileId(0), offset: 10, length: 20, timestamp: 30 }); + + rf.inner.inode_map.insert( + FileId(1), + InodeInfo { + inode_number: 1, + file_size: 2, + paths: vec!["world".to_owned()], + device_number: 3, + }, + ); + let e = rf.check().unwrap_err(); + assert_eq!( + e.to_string(), + "Stale inode(s) info found.\n\ + missing_file_ids: []\n\ + stale_inodes: [\n \ + InodeInfo {\n \ + inode_number: 1,\n \ + file_size: 2,\n \ + paths: [\n \"world\",\n ],\n \ + device_number: 3,\n },\n] \n\ + missing_paths:[]" + ); + } + + #[test] + fn check_missing_file() { + let mut rf = RecordsFile::default(); + rf.inner.inode_map.insert( + FileId(0), + InodeInfo { + inode_number: 0, + file_size: 1, + paths: vec!["hello".to_owned()], + device_number: 2, + }, + ); + rf.insert_record(Record { file_id: FileId(0), offset: 10, length: 20, timestamp: 30 }); + rf.insert_record(Record { file_id: FileId(1), offset: 10, length: 20, timestamp: 30 }); + + let e = rf.check().unwrap_err(); + assert_eq!( + e.to_string(), + "Stale inode(s) info found.\n\ + missing_file_ids: [\n \ + FileId(\n 1,\n ),\n]\n\ + stale_inodes: [] \n\ + missing_paths:[]" + ); + } + + #[test] + fn check_missing_paths() { + let mut rf = RecordsFile::default(); + rf.inner.inode_map.insert( + FileId(0), + InodeInfo { inode_number: 0, file_size: 1, paths: vec![], device_number: 2 }, + ); + rf.insert_record(Record { file_id: FileId(0), offset: 10, length: 20, timestamp: 30 }); + + let e = rf.check().unwrap_err(); + assert_eq!( + e.to_string(), + "Stale inode(s) info found.\n\ + missing_file_ids: []\n\ + stale_inodes: [] \n\ + missing_paths:[\n \ + InodeInfo {\n \ + inode_number: 0,\n \ + file_size: 1,\n \ + paths: [],\n \ + device_number: 2,\n },\n]" + ); + } +} diff --git a/init/libprefetch/prefetch/src/lib.rs b/init/libprefetch/prefetch/src/lib.rs new file mode 100644 index 000000000..4b56b13ee --- /dev/null +++ b/init/libprefetch/prefetch/src/lib.rs @@ -0,0 +1,186 @@ +// Copyright (C) 2024 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! A library to prefetch files on the file system to optimize startup times +//! + +mod args; +mod error; +mod format; +mod replay; +mod tracer; + +use std::fs::File; +use std::fs::OpenOptions; +use std::io; +use std::io::Write; +use std::os::unix::fs::PermissionsExt; +use std::string::ToString; +use std::thread; +use std::time::Duration; + +#[cfg(target_os = "android")] +use log::Level; +#[cfg(target_os = "linux")] +use log::LevelFilter; + +pub use args::args_from_env; +use args::OutputFormat; +pub use args::ReplayArgs; +pub use args::{DumpArgs, MainArgs, RecordArgs, SubCommands}; +pub use error::Error; +pub use format::FileId; +pub use format::InodeInfo; +pub use format::Record; +pub use format::RecordsFile; +use log::info; +#[cfg(target_os = "android")] +use log::warn; +pub use replay::Replay; +pub use tracer::nanoseconds_since_boot; + +#[cfg(target_os = "android")] +use rustutils::system_properties; +#[cfg(target_os = "android")] +use rustutils::system_properties::error::PropertyWatcherError; +#[cfg(target_os = "android")] +use rustutils::system_properties::PropertyWatcher; + +#[cfg(target_os = "android")] +fn wait_for_property_true(property_name: &str) -> Result<(), PropertyWatcherError> { + let mut prop = PropertyWatcher::new(property_name)?; + loop { + prop.wait(None)?; + if system_properties::read_bool(property_name, false)? { + break; + } + } + Ok(()) +} + +/// Records prefetch data for the given configuration +pub fn record(args: &RecordArgs) -> Result<(), Error> { + let (mut tracer, exit_tx) = tracer::Tracer::create( + args.trace_buffer_size_kib, + args.tracing_subsystem.clone(), + args.tracing_instance.clone(), + args.setup_tracing, + )?; + let duration = Duration::from_secs(args.duration as u64); + + let thd = thread::spawn(move || { + if !duration.is_zero() { + info!("Record start - waiting for duration: {:?}", duration); + thread::sleep(duration); + } else { + #[cfg(target_os = "android")] + wait_for_property_true("sys.boot_completed").unwrap_or_else(|e| { + warn!("failed to wait for sys.boot_completed with error: {}", e) + }); + } + + // We want to unwrap here on failure to send this signal. Otherwise + // tracer will continue generating huge records data. + exit_tx.send(()).unwrap(); + }); + + let mut rf = tracer.trace(args.int_path.as_ref())?; + thd.join() + .map_err(|_| Error::ThreadPool { error: "Failed to join timeout thread".to_string() })?; + + let mut out_file = + OpenOptions::new().write(true).create(true).truncate(true).open(&args.path).map_err( + |source| Error::Create { source, path: args.path.to_str().unwrap().to_owned() }, + )?; + + std::fs::set_permissions(&args.path, std::fs::Permissions::from_mode(0o644)) + .map_err(|source| Error::Create { source, path: args.path.to_str().unwrap().to_owned() })?; + + out_file + .write_all(&rf.add_checksum_and_serialize()?) + .map_err(|source| Error::Write { path: args.path.to_str().unwrap().to_owned(), source })?; + Ok(()) +} + +/// Replays prefetch data for the given configuration +pub fn replay(args: &ReplayArgs) -> Result<(), Error> { + let replay = Replay::new(args)?; + replay.replay() +} + +/// Dumps prefetch data in the human readable form +pub fn dump(args: &DumpArgs) -> Result<(), Error> { + let reader = File::open(&args.path) + .map_err(|source| Error::Open { source, path: args.path.to_str().unwrap().to_string() })?; + let rf: RecordsFile = + serde_cbor::from_reader(reader).map_err(|e| Error::Deserialize { error: e.to_string() })?; + match args.format { + OutputFormat::Json => println!( + "{:#}", + serde_json::to_string_pretty(&rf) + .map_err(|e| Error::Serialize { error: e.to_string() })? + ), + OutputFormat::Csv => rf.serialize_records_to_csv(&mut io::stdout())?, + } + Ok(()) +} + +/// An alias of android_logger::Level to use log level across android and linux. +#[cfg(target_os = "android")] +pub type LogLevel = Level; + +/// An alias of log::LevelFilter to use log level across android and linux. +#[cfg(not(target_os = "android"))] +pub type LogLevel = LevelFilter; + +/// Convenience logging initializer that is shared between the prefetch tool and c wrapper library +#[cfg(target_os = "android")] +pub fn init_logging(_level: LogLevel) { + android_logger::init_once( + android_logger::Config::default().with_max_level(log::LevelFilter::Info).format( + |f, record| { + write!( + f, + "{} prefetch_rs: {}:{} {}: {}", + nanoseconds_since_boot(), + record.file().unwrap_or("unknown_file"), + record.line().unwrap_or(0), + record.level(), + record.args() + ) + }, + ), + ) +} + +/// Convenience logging initializer that is shared between the prefetch tool and c wrapper library +#[cfg(target_os = "linux")] +pub fn init_logging(level: LogLevel) { + let mut builder = env_logger::Builder::from_default_env(); + + builder + .filter(None, level) + .format(|buf, record| { + writeln!( + buf, + "{} prefetch_rs: {}:{} {}: {}", + nanoseconds_since_boot(), + record.file().unwrap_or("unknown_file"), + record.line().unwrap_or(0), + record.level(), + record.args() + ) + }) + .init(); +} diff --git a/init/libprefetch/prefetch/src/main.rs b/init/libprefetch/prefetch/src/main.rs new file mode 100644 index 000000000..046e07eda --- /dev/null +++ b/init/libprefetch/prefetch/src/main.rs @@ -0,0 +1,41 @@ +// Copyright (C) 2024 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! A utility wrapper around libprefetch that allows to record, replay and dump +//! prefetch data. + +use log::error; + +use prefetch_rs::args_from_env; +use prefetch_rs::dump; +use prefetch_rs::init_logging; +use prefetch_rs::record; +use prefetch_rs::replay; +use prefetch_rs::LogLevel; +use prefetch_rs::MainArgs; +use prefetch_rs::SubCommands; + +fn main() { + init_logging(LogLevel::Debug); + let args: MainArgs = args_from_env(); + let ret = match &args.nested { + SubCommands::Record(args) => record(args), + SubCommands::Replay(args) => replay(args), + SubCommands::Dump(args) => dump(args), + }; + + if let Err(err) = ret { + error!("{:?} command failed: {:?}", args, err); + } +} diff --git a/init/libprefetch/prefetch/src/replay.rs b/init/libprefetch/prefetch/src/replay.rs new file mode 100644 index 000000000..b68d74762 --- /dev/null +++ b/init/libprefetch/prefetch/src/replay.rs @@ -0,0 +1,762 @@ +// Copyright (C) 2024 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::clone::Clone; +use std::convert::TryInto; +use std::fmt::Display; +use std::mem::replace; +use std::os::unix::io::AsRawFd; +use std::sync::Arc; +use std::sync::Mutex; +use std::sync::RwLock; +use std::thread; + +use log::debug; +use log::error; +use log::warn; +use lru_cache::LruCache; +use nix::errno::Errno; +use nix::fcntl::posix_fadvise; +use regex::Regex; + +use crate::args::ConfigFile; +use crate::format::Record; +use crate::format::{FileId, RecordsFile}; +use crate::Error; +use crate::ReplayArgs; +use libc::{c_void, off64_t, pread64}; +use std::fs::File; + +const READ_SZ: usize = 1024 * 1024; + +struct ScopedLog { + msg: T, + thd_id: usize, +} + +fn scoped_log(ctx: usize, msg: T) -> ScopedLog { + let thd_id = ctx; + debug!("{} {} start", thd_id, msg); + ScopedLog { msg, thd_id } +} + +impl Drop for ScopedLog { + fn drop(&mut self) { + debug!("{} {} end", self.thd_id, self.msg); + } +} + +fn readahead( + id: usize, + file: Arc, + record: &Record, + buffer: &mut [u8; READ_SZ], +) -> Result<(), Error> { + debug!("readahead {:?}", record); + let _dbg = scoped_log(id, "readahead"); + + let mut current_offset: off64_t = record + .offset + .try_into() + .map_err(|_| Error::Read { error: "Failed to convert offset".to_string() })?; + let mut remaining_data: usize = record + .length + .try_into() + .map_err(|_| Error::Read { error: "Failed to convert length".to_string() })?; + + while remaining_data > 0 { + let read_size = std::cmp::min(READ_SZ, remaining_data); + + // SAFETY: This is safe because + // - the file is known to exist and opened + // - buffer is allocated upfront and is guaranteed by the fact it comes from a mutable slice reference. + // - read_size is guaranteed not to exceed length of the buffer. + let bytes_read = unsafe { + pread64(file.as_raw_fd(), buffer.as_mut_ptr() as *mut c_void, read_size, current_offset) + }; + + if bytes_read == -1 { + return Err(Error::Read { error: format!("readahead failed: {}", Errno::last_raw()) }); + } + + if bytes_read == 0 { + break; // End of file reached + } + + current_offset += bytes_read as off64_t; + remaining_data -= bytes_read as usize; + } + + // TODO: Try readahead() syscall or async I/O + Ok(()) +} + +fn worker_internal( + id: usize, + state: Arc>, + records_file: Arc>, + exit_on_error: bool, + exclude_files_regex: Vec, + buffer: &mut [u8], +) -> Result<(), Error> { + loop { + let index = { + let mut state = state.lock().unwrap(); + if state.result.is_err() { + return Ok(()); + } + state.next_record() + }; + + let record = { + let rf = records_file.read().unwrap(); + if index >= rf.inner.records.len() { + return Ok(()); + } + rf.inner.records.get(index).unwrap().clone() + }; + + let _dbg = scoped_log(id, "record_replay"); + + let file = state.lock().unwrap().fds.get_mut(&record.file_id).map(|f| f.clone()); + + let file = match file { + Some(file) => file, + None => { + let file = Arc::new({ + let file = records_file + .read() + .unwrap() + .open_file(record.file_id.clone(), &exclude_files_regex); + if let Err(e) = file { + if exit_on_error { + return Err(e); + } else { + match e { + Error::SkipPrefetch { path } => { + debug!("Skipping file during replay: {}", path); + } + _ => error!( + "Failed to open file id: {} with {}", + record.file_id.clone(), + e.to_string() + ), + } + continue; + } + } + + let file = file.unwrap(); + // We do not want the filesystem be intelligent and prefetch more than what this + // code is reading. So turn off prefetch. + + if let Err(e) = posix_fadvise( + file.as_raw_fd(), + 0, + 0, + nix::fcntl::PosixFadviseAdvice::POSIX_FADV_RANDOM, + ) { + warn!( + "Failed to turn off filesystem read ahead for file id: {} with {}", + record.file_id.clone(), + e.to_string() + ); + } + file + }); + let cache_file = file.clone(); + state.lock().unwrap().fds.insert(record.file_id.clone(), cache_file); + file + } + }; + if let Err(e) = readahead(id, file, &record, buffer.try_into().unwrap()) { + if exit_on_error { + return Err(e); + } else { + error!( + "readahead failed on file id: {} with: {}", + record.file_id.clone(), + e.to_string() + ); + continue; + } + } + } +} + +fn worker( + id: usize, + state: Arc>, + records_file: Arc>, + exit_on_error: bool, + exclude_files_regex: Vec, + buffer: &mut [u8], +) { + let _dbg = scoped_log(id, "read_loop"); + let result = worker_internal( + id, + state.clone(), + records_file, + exit_on_error, + exclude_files_regex, + buffer, + ); + if result.is_err() { + error!("worker failed with {:?}", result); + let mut state = state.lock().unwrap(); + if state.result.is_ok() { + state.result = result; + } + } +} + +#[derive(Debug)] +pub struct SharedState { + fds: LruCache>, + records_index: usize, + result: Result<(), Error>, +} + +impl SharedState { + fn next_record(&mut self) -> usize { + let ret = self.records_index; + self.records_index += 1; + ret + } +} + +/// Runtime, in-memory, representation of records file structure. +#[derive(Debug)] +pub struct Replay { + records_file: Arc>, + io_depth: u16, + exit_on_error: bool, + state: Arc>, + exclude_files_regex: Vec, +} + +impl Replay { + /// Creates Replay from input `args`. + pub fn new(args: &ReplayArgs) -> Result { + let _dbg = scoped_log(1, "new"); + let reader: File = File::open(&args.path).map_err(|source| Error::Open { + source, + path: args.path.to_str().unwrap().to_owned(), + })?; + let rf: RecordsFile = serde_cbor::from_reader(reader) + .map_err(|error| Error::Deserialize { error: error.to_string() })?; + + let mut exclude_files_regex: Vec = Vec::new(); + // The path to the configuration file is optional in the command. + // If the path is provided, the configuration file will be read. + if !&args.config_path.as_os_str().is_empty() { + let config_reader = File::open(&args.config_path).map_err(|source| Error::Open { + source, + path: args.path.to_str().unwrap().to_owned(), + })?; + let cf: ConfigFile = serde_json::from_reader(config_reader) + .map_err(|error| Error::Deserialize { error: error.to_string() })?; + + for file_to_exclude in &cf.files_to_exclude_regex { + exclude_files_regex.push(Regex::new(file_to_exclude).unwrap()); + } + } + + Ok(Self { + records_file: Arc::new(RwLock::new(rf)), + io_depth: args.io_depth, + exit_on_error: args.exit_on_error, + state: Arc::new(Mutex::new(SharedState { + fds: LruCache::new(args.max_fds.into()), + records_index: 0, + result: Ok(()), + })), + exclude_files_regex, + }) + } + + /// Replay records. + pub fn replay(self) -> Result<(), Error> { + let _dbg = scoped_log(1, "replay"); + let mut threads = vec![]; + for i in 0..self.io_depth { + let i_clone = i as usize; + let state = self.state.clone(); + let records_file = self.records_file.clone(); + let exit_on_error = self.exit_on_error; + let exclude_files_regex = self.exclude_files_regex.clone(); + + let mut buffer = Box::new([0u8; READ_SZ]); + + threads.push(thread::Builder::new().spawn(move || { + worker( + i_clone, + state, + records_file, + exit_on_error, + exclude_files_regex, + buffer.as_mut_slice(), + ) + })); + } + for thread in threads { + thread.unwrap().join().unwrap(); + } + replace(&mut self.state.lock().unwrap().result, Ok(())) + } +} + +// WARNING: flaky tests. +// In these tests we create files, invalidate their caches and then replay. +// Verify that after reply the same portions of data is in memory. +// +// Since these tests to rely on presence or absence of data in cache, the +// files used by the tests should not be in tmp filesystem. So we use relative +// path as target directory. There is no guarantee that this target directory +// is not on temp filesystem but chances are better than using target directory +// in tempfs. +// +// Tests can be flaky if the system under tests is running low on memory. The +// tests create file using O_DIRECT so that no data is left in file cache. +// Though this is sufficient to avoid caching, but other processes reading these +// files(like anti-virus) or some other system processes might change the state +// of the cache. Or it may happen that the filesystem evicts the file before +// we verify that read ahead worked as intended. +#[cfg(test)] +pub mod tests { + use std::{ + assert, + io::Write, + ops::Range, + path::{Path, PathBuf}, + time::Duration, + }; + + use crate::format::DeviceNumber; + use crate::format::FsInfo; + use crate::format::InodeNumber; + use crate::nanoseconds_since_boot; + use nix::sys::mman::MapFlags; + use nix::sys::mman::ProtFlags; + use serde::Deserialize; + use serde::Serialize; + use std::collections::HashMap; + use std::fs::OpenOptions; + use std::num::NonZeroUsize; + use std::os::fd::AsFd; + use std::os::unix::fs::symlink; + use std::os::unix::fs::MetadataExt; + use std::ptr::NonNull; + use tempfile::NamedTempFile; + + use super::*; + use crate::tracer::{ + page_size, + tests::{copy_uncached_files_and_record_from, setup_test_dir}, + }; + + static MB: u64 = 1024 * 1024; + static KB: u64 = 1024; + + fn random_write(file: &mut NamedTempFile, base: u64) -> Range { + let start: u64 = base + (rand::random::() % (base / 2)) as u64; + let len: u64 = rand::random::() % (32 * KB); + let buf = vec![5; len as usize]; + nix::sys::uio::pwrite(file.as_fd(), &buf, start as i64).unwrap(); + start..(start + len) + } + + pub(crate) fn create_file( + path: Option<&Path>, + align: Option, + ) -> (NamedTempFile, Vec>) { + let mut file = if let Some(path) = path { + NamedTempFile::new_in(path).unwrap() + } else { + NamedTempFile::new().unwrap() + }; + let range1 = random_write(&mut file, 32 * KB); + let range2 = random_write(&mut file, 128 * KB); + let range3 = random_write(&mut file, 4 * MB); + if let Some(align) = align { + let orig_size = file.metadata().unwrap().len(); + let aligned_size = orig_size + (align - (orig_size % align)); + file.set_len(aligned_size).unwrap(); + } + (file, vec![range1, range2, range3]) + } + + pub(crate) fn generate_cached_files_and_record( + path: Option<&Path>, + create_symlink: bool, + align: Option, + ) -> (RecordsFile, Vec<(NamedTempFile, Vec>)>) { + let file1 = create_file(path, align); + let file2 = create_file(path, align); + let file3 = create_file(path, align); + + let mut f: RecordsFileBuilder = Default::default(); + f.add_file(file1.0.path().to_str().unwrap()); + f.add_file(file2.0.path().to_str().unwrap()); + f.add_file(file3.0.path().to_str().unwrap()); + if create_symlink { + let symlink_path = format!("{}-symlink", file1.0.path().to_str().unwrap()); + symlink(file1.0.path().file_name().unwrap(), &symlink_path).unwrap(); + + f.add_file(&symlink_path); + } + let rf = f.build().unwrap(); + (rf, vec![file1, file2, file3]) + } + + /// RecordsFileBuilder is primarily used for testing purpose. This + /// is a thin wrapper around "Record". This gives the ability + /// to test Records functionality. The flow of this test is as follows: + /// + /// 1: generate_cached_files_and_record -> This will create temporary files of different length + /// and builds the "RecordFile" format. + /// 2: For each of the file path create, a "RecordsFile" is generated. + /// a: mmap the file based on the length. + /// b: call mincore() to get the residency of pages in memory for the given + /// length. + /// c: Iterate over the buffer of pages returned by mincore(). If a page + /// is not resident in RAM, construct the "Record" structure. + /// 3: build() function will finally return a constructed Prefetch Record which + /// contains all the "Record" structure required for "Replay". + #[derive(Debug, Default, Deserialize, Serialize)] + pub struct RecordsFileBuilder { + // Temporarily holds paths of all files opened by other processes. + pub(crate) paths: HashMap, + + // Read inode numbers + inode_numbers: HashMap<(DeviceNumber, InodeNumber), FileId>, + } + + impl RecordsFileBuilder { + pub fn add_file(&mut self, path: &str) { + if self.paths.contains_key(path) { + return; + } + + self.paths.insert(path.to_owned(), FileId(self.paths.len() as u64)); + } + + pub fn build(&mut self) -> Result { + let mut rf = RecordsFile::default(); + for (path, mut id) in self.paths.drain() { + let stat = Path::new(&path) + .metadata() + .map_err(|source| Error::Stat { source, path: path.clone() })?; + + rf.inner + .filesystems + .entry(stat.dev()) + .or_insert(FsInfo { block_size: stat.blksize() }); + + if let Some(orig_id) = self.inode_numbers.get(&(stat.dev(), stat.ino())) { + let inode = rf.inner.inode_map.get_mut(orig_id).unwrap(); + inode.paths.push(path.clone()); + + // There may be multiple paths for the file so from those path we may have multiple + // ids. Override the id. + id = orig_id.clone(); + } else { + self.inode_numbers.insert((stat.dev(), stat.ino()), id.clone()); + rf.insert_or_update_inode(id.clone(), &stat, path.clone()); + } + if let Some(mmap) = Mmap::create(&path, id)? { + mmap.get_records(&mut rf.inner.records)?; + } + } + Ok(rf) + } + } + + #[derive(Debug)] + pub(crate) struct Mmap { + map_addr: *mut c_void, + length: usize, + #[allow(dead_code)] + file: File, + file_id: FileId, + } + + impl Mmap { + pub fn create(path: &str, file_id: FileId) -> Result, Error> { + let file = OpenOptions::new() + .read(true) + .write(false) + .open(path) + .map_err(|source| Error::Open { source, path: path.to_owned() })?; + + let length = file + .metadata() + .map_err(|source| Error::Stat { source, path: path.to_owned() })? + .len() as usize; + + if length == 0 { + return Ok(None); + } + + // SAFETY: This is safe because + // - the length is checked for zero + // - offset is set to 0 + let map_addr = unsafe { + nix::sys::mman::mmap( + None, + NonZeroUsize::new(length).unwrap(), + ProtFlags::PROT_READ, + MapFlags::MAP_SHARED, + file.as_fd(), + 0, + ) + .map_err(|source| Error::Mmap { + error: source.to_string(), + path: path.to_owned(), + })? + }; + + Ok(Some(Self { map_addr: map_addr.as_ptr(), length, file, file_id })) + } + + /// Construct the "Record" file based on pages resident in RAM. + pub(crate) fn get_records(&self, records: &mut Vec) -> Result<(), Error> { + let page_size = page_size()?; + let page_count = (self.length + page_size - 1) / page_size; + let mut buf: Vec = vec![0_u8; page_count]; + // SAFETY: This is safe because + // - the file is mapped + // - buf points to a valid and sufficiently large memory region with the + // requirement of (length+PAGE_SIZE-1) / PAGE_SIZE bytes + let ret = unsafe { libc::mincore(self.map_addr, self.length, buf.as_mut_ptr()) }; + if ret < 0 { + return Err(Error::Custom { + error: format!("failed to query resident pages: {}", Errno::last_raw()), + }); + } + let mut i = 0; + + let mut offset_length: Option<(u64, u64)> = None; + for (index, resident) in buf.iter().enumerate() { + if *resident != 0 { + if let Some((_, length)) = &mut offset_length { + *length += page_size as u64; + } else { + offset_length = Some((index as u64 * page_size as u64, page_size as u64)); + } + } else if let Some((offset, length)) = offset_length { + i += 1; + records.push(Record { + file_id: self.file_id.clone(), + offset, + length, + timestamp: nanoseconds_since_boot(), + }); + + offset_length = None; + } + } + + if let Some((offset, length)) = offset_length { + i += 1; + records.push(Record { + file_id: self.file_id.clone(), + offset, + length, + timestamp: nanoseconds_since_boot(), + }); + } + debug!("records found: {} for {:?}", i, self); + + Ok(()) + } + } + + impl Drop for Mmap { + fn drop(&mut self) { + // SAFETY: This is safe because + // - addr is mapped and is multiple of page_size + let ret = unsafe { + nix::sys::mman::munmap(NonNull::new(self.map_addr).unwrap(), self.length) + }; + if let Err(e) = ret { + error!( + "failed to munmap {:p} {} with {}", + self.map_addr, + self.length, + e.to_string() + ); + } + } + } + + // Please see comment above RecordsFileBuilder. + fn rebuild_records_file(files: &[(PathBuf, Vec>)]) -> RecordsFile { + // Validate that caches are dropped + let mut f: RecordsFileBuilder = Default::default(); + for (path, _) in files { + f.add_file(path.to_str().unwrap()); + } + f.build().unwrap() + } + + fn ensure_files_not_cached(files: &mut [(PathBuf, Vec>)]) { + assert!(rebuild_records_file(files).inner.records.is_empty()); + } + + fn has_record(records: &[Record], key: &Record) -> bool { + for r in records { + if r.offset == key.offset && r.length == key.length { + return true; + } + } + false + } + + fn compare_records(old: &[Record], new: &[Record]) { + for key in new { + if !has_record(old, key) { + panic!("Failed to file {:?} in {:?}", key, old); + } + } + } + + fn create_test_config_file(files_to_exclude_regex: Vec) -> String { + let cfg = ConfigFile { files_to_exclude_regex, ..Default::default() }; + serde_json::to_string(&cfg).unwrap() + } + + // TODO: Split this into individual tests for better readability. + // b/378554334 + fn test_replay_internal( + create_symlink: bool, + exit_on_error: bool, + inject_error: bool, + exclude_all_files: bool, + empty_exclude_file_list: bool, + ) { + let page_size = page_size().unwrap() as u64; + let test_base_dir = setup_test_dir(); + let (rf, mut files) = + generate_cached_files_and_record(None, create_symlink, Some(page_size)); + + // Here "uncached_files" emulate the files after reboot when none of those files data is in cache. + let (mut uncached_rf, mut uncached_files) = + copy_uncached_files_and_record_from(Path::new(&test_base_dir), &mut files, &rf); + + // Injects error(s) in the form of invalid filename + if inject_error { + if let Some(v) = uncached_rf.inner.inode_map.values_mut().next() { + for path in &mut v.paths { + path.push('-'); + } + } + } + + let mut file = NamedTempFile::new().unwrap(); + file.write_all(&uncached_rf.add_checksum_and_serialize().unwrap()).unwrap(); + let mut config_file = NamedTempFile::new().unwrap(); + + let mut files_to_exclude: Vec = Vec::new(); + if exclude_all_files { + // Exclude files from replay by adding them in config + for v in uncached_rf.inner.inode_map.values_mut() { + for path in &mut v.paths { + files_to_exclude.push(path.to_string()) + } + } + } else if empty_exclude_file_list { + files_to_exclude.extend(vec![]); + } else { + // Exclude file1 and file2 during replay + files_to_exclude.extend(vec!["file1".to_owned(), "file2".to_owned()]); + } + + // Create a config json to exclude files during replay + let config_file_contents = create_test_config_file(files_to_exclude); + config_file.write_all(config_file_contents.as_bytes()).unwrap(); + + ensure_files_not_cached(&mut uncached_files); + + let replay = Replay::new(&ReplayArgs { + path: file.path().to_owned(), + io_depth: 32, + max_fds: 128, + exit_on_error, + config_path: config_file.path().to_owned(), + }) + .unwrap(); + + let result = replay.replay(); + // Sleep a bit so that readaheads are complete. + thread::sleep(Duration::from_secs(1)); + + if exit_on_error && inject_error { + result.expect_err("Failure was expected"); + } else if exclude_all_files { + let new_rf = rebuild_records_file(&uncached_files); + assert!(new_rf.inner.records.is_empty()); + } else { + result.unwrap(); + + // At this point, we have prefetched data for uncached file bringing same set of + // data in memory as the original cached files. + // If we record prefetch data for new files, we should get same records files + // (offset and lengths) except that the file names should be different. + // This block verifies it. + // Note: `new_rf` is for uncached_files. But, [un]fortunately, those "uncached_files" + // are now cached after we replayed the records. + let new_rf = rebuild_records_file(&uncached_files); + assert!(!new_rf.inner.records.is_empty()); + assert_eq!(rf.inner.inode_map.len(), new_rf.inner.inode_map.len()); + assert_eq!(rf.inner.records.len(), new_rf.inner.records.len()); + compare_records(&rf.inner.records, &new_rf.inner.records); + } + } + + #[test] + fn test_replay() { + test_replay_internal(true, false, false, false, false); + } + + #[test] + fn test_replay_strict() { + test_replay_internal(true, true, false, false, false); + } + + #[test] + fn test_replay_no_symlink() { + test_replay_internal(false, false, false, false, false); + } + + #[test] + fn test_replay_no_symlink_strict() { + test_replay_internal(false, true, false, false, false); + } + + #[test] + fn test_replay_fails_on_error() { + test_replay_internal(true, true, true, false, false); + } + + #[test] + fn test_replay_exclude_all_files() { + test_replay_internal(true, false, false, true, false); + } + + #[test] + fn test_replay_empty_exclude_files_list() { + test_replay_internal(true, false, false, false, true); + } +} diff --git a/init/libprefetch/prefetch/src/tracer/mem.rs b/init/libprefetch/prefetch/src/tracer/mem.rs new file mode 100644 index 000000000..f69ae807b --- /dev/null +++ b/init/libprefetch/prefetch/src/tracer/mem.rs @@ -0,0 +1,897 @@ +// Copyright (C) 2024 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! See top level documentation for `crate::tracer`. + +use std::collections::hash_map::Iter; +use std::fs::symlink_metadata; +use std::io::{ErrorKind, Write}; +use std::iter::Iterator; +use std::mem::take; +use std::os::unix::fs::MetadataExt; +use std::{ + collections::{HashMap, HashSet}, + fs::read_to_string, + option::Option, + path::{Path, PathBuf}, +}; + +use log::{debug, error, info, warn}; +use regex::Regex; +use serde::Deserialize; +use serde::Serialize; +use walkdir::{DirEntry, WalkDir}; + +use crate::format::{coalesce_records, FsInfo}; +use crate::tracer::{page_size, TracerConfigs}; +use crate::{ + format::{DeviceNumber, InodeNumber}, + tracer::{TraceSubsystem, EXCLUDE_PATHS}, + Error, FileId, Record, RecordsFile, +}; + +static MOUNTINFO_PATH: &str = "/proc/self/mountinfo"; + +// Trace events to enable +// Paths are relative to trace mount point +static TRACE_EVENTS: &[&str] = + &["events/filemap/mm_filemap_add_to_page_cache/enable", "tracing_on"]; + +// Filesystem types to ignore +static EXCLUDED_FILESYSTEM_TYPES: &[&str] = &[ + "binder", + "bpf", + "cgroup", + "cgroup2", + "configfs", + "devpts", + "fuse", // No emulated storage + "fusectl", + "proc", + "pstore", + "selinuxfs", + "sysfs", + "tmpfs", // Check for apex mount points + "tracefs", + "functionfs", // adb, fastboot + "f2fs", // Skip /data mounts +]; + +#[cfg(target_os = "linux")] +type MajorMinorType = u32; +#[cfg(target_os = "android")] +type MajorMinorType = i32; + +// TODO(b/302056482): Once we uprev nix crate, we can use the function exported by the crate. +fn major(dev: DeviceNumber) -> MajorMinorType { + (((dev >> 32) & 0xffff_f000) | ((dev >> 8) & 0x0000_0fff)) as MajorMinorType +} + +// TODO(b/302056482): Once we uprev nix crate, we can use the function exported by the crate. +fn minor(dev: DeviceNumber) -> MajorMinorType { + (((dev >> 12) & 0xffff_ff00) | ((dev) & 0x0000_00ff)) as MajorMinorType +} + +// TODO(b/302056482): Once we uprev nix crate, we can use the function exported by the crate. +fn makedev(major: MajorMinorType, minor: MajorMinorType) -> DeviceNumber { + let major = major as DeviceNumber; + let minor = minor as DeviceNumber; + ((major & 0xffff_f000) << 32) + | ((major & 0x0000_0fff) << 8) + | ((minor & 0xffff_ff00) << 12) + | (minor & 0x0000_00ff) +} + +fn build_device_number(major: &str, minor: &str) -> Result { + Ok(makedev( + major.parse::().map_err(|e| Error::Custom { + error: format!("Failed to parse major number from {} with {}", major, e), + })?, + minor.parse::().map_err(|e| Error::Custom { + error: format!("Failed to parse major number from {} with {}", major, e), + })?, + )) +} + +// Returns timestamp in nanoseconds +fn build_timestamp(seconds: &str, microseconds: &str) -> Result { + let seconds = seconds.parse::().map_err(|e| Error::Custom { + error: format!("Failed to parse seconds from {} with {}", seconds, e), + })?; + let microseconds = microseconds.parse::().map_err(|e| Error::Custom { + error: format!("Failed to parse microseconds from {} with {}", seconds, e), + })?; + Ok((seconds * 1_000_000_000) + (microseconds * 1_000)) +} + +#[cfg(not(target_os = "android"))] +fn is_highly_privileged_path(_path: &Path) -> bool { + false +} + +#[cfg(target_os = "android")] +fn is_highly_privileged_path(path: &Path) -> bool { + // Following directories contain a mix of files with and without access to stat/read. + // We do not completely exclude these directories as there is still a lot of + // file we can issue readahead on. Some of the files on which readahead fails include + // - /system/bin/run-as + // - /data/data/com.android.storagemanager + // - /system/apex/com.android.art/bin/dex2oat32 + // - /data/user/0/com.android.systemui + // + // - TODO: /system/apex: Apex files in read-only partition may be read during boot. + // However, some files may not have access. Double check the record files + // to filter out the exact path. + let privileged_paths = [ + "/data/data", + "/data/user/0", + "/data/user_de/0", + "/system/bin/", + "/system/etc/selinux/", + "/system/system_ext/etc/selinux/", + "/system/product/etc/selinux/", + "/system/vendor/etc/selinux/", + "/system_ext/etc/selinux/", + "/product/etc/selinux/", + "/vendor/etc/selinux/", + "/system/xbin", + "/system/etc/", + "/data/", + "/postinstall/", + "/mnt/", + "/metadata/", + ]; + for privileged in privileged_paths { + if path.to_str().unwrap().starts_with(privileged) { + return true; + } + } + false +} + +enum DeviceState { + Include((DeviceNumber, PathBuf)), + Exclude(DeviceNumber), +} + +/// Utility struct that helps to include and exclude devices and mount points that need and don't +/// need prefetching. +#[derive(Debug, Deserialize, Serialize)] +struct MountInfo { + // Map of device number to mount points + included_devices: HashMap, + + // Devices that we don't want to prefetch - like devices backing tempfs and sysfs + excluded_devices: HashSet, +} + +impl MountInfo { + // Parses file at `path` to build `Self`.` + fn create(path: &str) -> Result { + let buf = read_to_string(path) + .map_err(|e| Error::Read { error: format!("Reading {} failed with: {}", path, e) })?; + Self::with_buf(&buf) + } + + // Parses string in `buf` to build `Self`. + fn with_buf(buf: &str) -> Result { + let regex = Self::get_regex()?; + let mut included_devices: HashMap = HashMap::new(); + let mut excluded_devices = HashSet::new(); + let excluded_filesystem_types: HashSet = + EXCLUDED_FILESYSTEM_TYPES.iter().map(|s| String::from(*s)).collect(); + for line in buf.lines() { + if let Some(state) = Self::parse_line(®ex, &excluded_filesystem_types, line)? { + match state { + DeviceState::Include((device, path)) => { + included_devices.insert(device, path); + } + DeviceState::Exclude(device) => { + excluded_devices.insert(device); + } + } + } + } + + Ok(Self { included_devices, excluded_devices }) + } + + fn parse_line( + re: &Regex, + excluded_filesystem_types: &HashSet, + line: &str, + ) -> Result, Error> { + let caps = match re.captures(line) { + Some(caps) => caps, + None => { + return Ok(None); + } + }; + if &caps["relative_path"] != "/" { + return Ok(None); + } + + let mount_point = &caps["mount_point"]; + let mnt_pnt_with_slash = format!("{}/", mount_point); + let device_number = build_device_number(&caps["major"], &caps["minor"])?; + let fs_type = &caps["fs_type"]; + + if excluded_filesystem_types.contains(fs_type) { + info!( + "excluding fs type: {} for {} mount-point {} slash {}", + fs_type, line, mount_point, mnt_pnt_with_slash + ); + return Ok(Some(DeviceState::Exclude(device_number))); + } + + for excluded in EXCLUDE_PATHS { + if mnt_pnt_with_slash.starts_with(excluded) { + info!( + "exclude-paths fs type: {} for {} mount-point {} slash {}", + fs_type, line, mount_point, mnt_pnt_with_slash + ); + return Ok(Some(DeviceState::Exclude(device_number))); + } + } + + Ok(Some(DeviceState::Include((device_number, PathBuf::from(mount_point))))) + } + + fn get_regex() -> Result { + Regex::new(concat!( + r"^\s*(?P\S+)", + r"\s+(?P\S+)", + r"\s+(?P[0-9]+):(?P[0-9]+)", + r"\s+(?P\S+)", + r"\s+(?P\S+)", + r"\s+(?P\S+)", + r"\s+(?P\S+)", + r"\s+\S+", + r"\s+(?P\S+)", + r"\s+(?P\S+)" + )) + .map_err(|e| Error::Custom { + error: format!("create regex for parsing mountinfo failed with: {}", e), + }) + } + + fn is_excluded(&self, device: &DeviceNumber) -> bool { + self.excluded_devices.contains(device) + } + + fn get_included(&self) -> Iter { + self.included_devices.iter() + } +} + +#[derive(Default, PartialEq, Debug, Eq, Hash)] +struct TraceLineInfo { + device: DeviceNumber, + inode: InodeNumber, + offset: u64, + timestamp: u64, +} + +impl TraceLineInfo { + pub fn from_trace_line(re: &Regex, line: &str) -> Result, Error> { + let caps = match re.captures(line) { + Some(caps) => caps, + None => return Ok(None), + }; + let major = &caps["major"]; + let minor = &caps["minor"]; + let ino = &caps["ino"]; + let offset = &caps["offset"]; + let timestamp = build_timestamp(&caps["seconds"], &caps["microseconds"])?; + Ok(Some(TraceLineInfo { + device: build_device_number(major, minor)?, + inode: u64::from_str_radix(ino, 16).map_err(|e| Error::Custom { + error: format!("failed parsing inode: {} : {}", ino, e), + })?, + offset: offset.parse::().map_err(|e| Error::Custom { + error: format!("failed parsing offset: {} : {}", offset, e), + })?, + timestamp, + })) + } + + #[cfg(test)] + pub fn from_fields( + major: MajorMinorType, + minor: MajorMinorType, + inode: u64, + offset: u64, + timestamp: u64, + ) -> Self { + Self { device: makedev(major, minor), inode, offset, timestamp } + } + + // Convenience function to create regex. Used once per life of `record` but multiple times in + // case of tests. + pub fn get_trace_line_regex() -> Result { + // TODO: Fix this Regex expression for 5.15 kernels. This expression + // works only on 6.1+. Prior to 6.1, "" was present in the output. + Regex::new(concat!( + r"^\s+(?P\S+)", + r"\s+(?P\S+)", + r"\s+(?P\S+)", + r"\s+(?P[0-9]+)\.(?P[0-9]+):", + r"\s+mm_filemap_add_to_page_cache:", + r"\s+dev\s+(?P[0-9]+):(?P[0-9]+)", + r"\s+ino\s+(?P\S+)", + //r"\s+(?P\S+)", + r"\s+(?P\S+)", + r"\s+ofs=(?P[0-9]+)" + )) + .map_err(|e| Error::Custom { + error: format!("create regex for tracing failed with: {}", e), + }) + } +} + +#[derive(Debug, Serialize, Deserialize)] +struct MissingFile { + major_no: MajorMinorType, + minor_no: MajorMinorType, + inode: InodeNumber, + records: Vec, +} + +#[derive(Debug, Default, Deserialize, Serialize)] +struct DebugInfo { + // Check all inodes for which paths don't exists. These are the files which + // * got deleted before we got to them + // * are filesystem internal files that fs access only via inode numbers. + missing_files: HashMap, + + // Number of bytes read that belongs to directory type inodes. + directory_read_bytes: u64, + + // Number of bytes read from files for which we could not find a path in + // the filesystems. + missing_path_bytes: u64, + + // Paths for which the current process doesn't have read permission. + privileged_paths: Vec, +} + +#[derive(Debug, Serialize)] +pub(crate) struct MemTraceSubsystem { + device_inode_map: HashMap>, + // Count of all InodeNumber held by `device_inode_map`. This is handy to assign unique + // FileId. + inode_count: u64, + + // `Record`s built from parsing read trace lines. + records: Vec, + + // Regex to parse lines from trace_pipe. + #[serde(skip_serializing)] + regex: Regex, + + // Mounted devices/filesystems either at the time of parsing trace file or at the time + // of building RecordsFile from parsed lines. + mount_info: MountInfo, + + // A copy of TracerConfigs + tracer_configs: Option, + + // system page size stored to avoid frequent syscall to get the page size. + page_size: u64, + + // The fields of the debug_info are populated when build_records_file is called (after lines + // are parsed from the trace file/pipe). + debug_info: DebugInfo, +} + +impl MemTraceSubsystem { + pub fn update_configs(configs: &mut TracerConfigs) { + for path in EXCLUDE_PATHS { + configs.excluded_paths.push(path.to_owned().to_string()); + } + + for event in TRACE_EVENTS { + configs.trace_events.push(event.to_owned().to_string()); + } + configs.mountinfo_path = Some(MOUNTINFO_PATH.to_string()); + } + + pub fn create_with_configs(tracer_configs: TracerConfigs) -> Result { + static INITIAL_RECORDS_CAPACITY: usize = 100_000; + debug!("TracerConfig: {:#?}", tracer_configs); + + let regex = TraceLineInfo::get_trace_line_regex()?; + let mount_info = MountInfo::create(tracer_configs.mountinfo_path.as_ref().unwrap())?; + debug!("mountinfo: {:#?}", mount_info); + + Ok(Self { + device_inode_map: HashMap::new(), + inode_count: 0, + // For one product of android, we see around 50k records. To avoid a lot allocations + // and copying of records, we create a vec of this size. + // + // We do this to reduces chances of losing data, however unlikely, coming over + // `trace_pipe`. + // + // Note: Once we are done reading trace lines, we are less pedantic about allocations + // and mem copies. + records: Vec::with_capacity(INITIAL_RECORDS_CAPACITY), + regex, + mount_info, + tracer_configs: Some(tracer_configs), + page_size: page_size()? as u64, + debug_info: DebugInfo { + missing_files: HashMap::new(), + directory_read_bytes: 0, + missing_path_bytes: 0, + privileged_paths: vec![], + }, + }) + } + + fn new_file_id(&mut self) -> FileId { + let id = self.inode_count; + self.inode_count += 1; + FileId(id) + } + + fn get_trace_info(&self, line: &str) -> Result, Error> { + TraceLineInfo::from_trace_line(&self.regex, line) + } + + // Returns true if the file or directory is on a device which is excluded from walking. + // If the path was excluded because the current process doesn't have privileged to read it, + // the path gets added to `privileged` list. + fn is_excluded(&self, entry: &DirEntry, device: u64, privileged: &mut Vec) -> bool { + // We skip paths that are reside on excluded devices here. This is ok because a + // non-excluded mount point will have a separate entry in MountInfo. For example + // - `/` has ext4 + // - `/tmp` has tempfs + // - `/tmp/mnt` has ext4 that we are interested in. + // MountInfo will have three entries - `/`, `/tmp/` and `/tmp/mnt`. Skipping walking + // `/tmp` while walking `/` is ok as next `mount_info.get_included()` will return + // `/tmp/mnt` path. + // + // + // We skip links here as they can refer to mount points across + // filesystems. If that path is valid and access are valid, then + // we should have entry by the file's pair. + // + // + // We skip devices that don't match current walking device because we eventually + // walk other devices. + match symlink_metadata(entry.path()) { + Ok(lstat) => { + if self.mount_info.is_excluded(&lstat.dev()) + || lstat.dev() != device + || lstat.file_type().is_symlink() + { + return true; + } + } + Err(e) => { + error!("stat on {} failed with {}", entry.path().to_str().unwrap(), e); + + // We treat EACCES special because on some platforms, like android, process needs to + // have very special set of permissions to access some inodes. + // We ignore errors in such cases *after* making an effort to get to them. + if e.kind() == ErrorKind::PermissionDenied + && is_highly_privileged_path(entry.path()) + { + privileged.push(entry.path().to_owned()); + return true; + } + } + } + + // On error, we return false because if lstat has failed, it will fail following operations + // including stat. + false + } +} + +impl TraceSubsystem for MemTraceSubsystem { + fn add_line(&mut self, line: &str) -> Result<(), Error> { + if let Some(info) = self.get_trace_info(line)? { + if self.mount_info.is_excluded(&info.device) { + return Ok(()); + } + + self.device_inode_map.entry(info.device).or_default(); + + let file_id = if let Some(id) = + self.device_inode_map.get_mut(&info.device).unwrap().get(&info.inode) + { + id.clone() + } else { + self.new_file_id() + }; + self.device_inode_map + .get_mut(&info.device) + .unwrap() + .insert(info.inode, file_id.clone()); + + self.records.push(Record { + file_id, + offset: info.offset, + length: self.page_size, + timestamp: info.timestamp, + }); + } + + Ok(()) + } + + fn build_records_file(&mut self) -> Result { + // reset debug_info in case build_records_file was called twice. + self.debug_info = DebugInfo::default(); + let mut rf = RecordsFile::default(); + let mut directories = HashSet::new(); + + // TODO(b/302194377): We are holding all privileged_paths in this variable and then + // transferring it to `self.debug_info.privileged_paths` later. We can avoid this step + // if we directly update `self.debug_info.privileged_paths`. To do so, we need to refactor + // code to make borrow not complain at several places - ex. immutably borrowing + // `self.mount_info` in outer loop and then mutably borrowing + // `self.debug_info.privileged_paths`. + let mut privileged_paths = vec![]; + + // Reload mount_info. When we created mount_info for the first time, maybe + // the system was in early boot phase. Reload the mount_info so as to get + // current/new mount points. + if let Some(tracer_config) = &self.tracer_configs { + self.mount_info = MountInfo::create(tracer_config.mountinfo_path.as_ref().unwrap())?; + debug!("reloaded mountinfo: {:#?}", self.mount_info); + } + + for (device, root_path) in self.mount_info.get_included() { + let inode_map = if let Some(map) = self.device_inode_map.get(device) { + map + } else { + continue; + }; + + if inode_map.is_empty() { + return Err(Error::Custom { + error: format!("Unexpected empty records for {:?}", root_path), + }); + } + + let mut block_size = 0; + let walker = WalkDir::new(root_path).into_iter(); + + for entry in + walker.filter_entry(|e| !self.is_excluded(e, *device, &mut privileged_paths)) + { + let path = match entry { + Ok(entry) => entry.path().to_owned(), + Err(e) => { + error!("walking directory failed: {} {}", root_path.to_str().unwrap(), e); + continue; + } + }; + + let stat = match path.metadata() { + Ok(stat) => stat, + Err(e) => { + error!("stat on {} failed with {}", path.to_str().unwrap(), e); + continue; + } + }; + + block_size = stat.blksize(); + + let file_id = if let Some(id) = inode_map.get(&stat.ino()) { + id.clone() + } else { + continue; + }; + + // We cannot issue a normal readahead on directories. So we skip those records that + // belong to directories. + if stat.file_type().is_dir() { + info!( + "skipping directory readahead record for file_id:{file_id} ino:{} path:{} ", + stat.ino(), + path.to_str().unwrap() + ); + directories.insert(file_id.clone()); + continue; + } + + rf.insert_or_update_inode(file_id, &stat, path.to_str().unwrap().to_owned()); + } + + rf.inner.filesystems.insert(*device, FsInfo { block_size }); + } + + self.debug_info.privileged_paths.append(&mut privileged_paths); + + for (device, inode_map) in &self.device_inode_map { + for (inode, file_id) in inode_map { + if !rf.inner.inode_map.contains_key(file_id) { + let major_no: MajorMinorType = major(*device); + let minor_no: MajorMinorType = minor(*device); + self.debug_info.missing_files.insert( + file_id.clone(), + MissingFile { major_no, minor_no, inode: *inode, records: vec![] }, + ); + } + } + } + + // Remove all records that belong to directories or for which we did not find paths. + let mut records = vec![]; + for record in take(&mut self.records) { + if directories.contains(&record.file_id) { + self.debug_info.directory_read_bytes += record.length; + } else if let Some(missing_file) = + self.debug_info.missing_files.get_mut(&record.file_id) + { + self.debug_info.missing_path_bytes += record.length; + missing_file.records.push(record); + } else { + records.push(record); + } + } + + warn!( + "Recorded {} bytes worth of data read from directories", + self.debug_info.directory_read_bytes + ); + warn!( + "Recorded {} bytes worth of data read from files that don't have paths", + self.debug_info.missing_path_bytes + ); + + rf.inner.records = coalesce_records(records, true); + + Ok(rf) + } + + fn serialize(&self, write: &mut dyn Write) -> Result<(), Error> { + write + .write_all( + &serde_json::to_vec(&self) + .map_err(|e| Error::Serialize { error: e.to_string() })?, + ) + .map_err(|source| Error::Write { path: "intermediate file".to_owned(), source }) + } +} + +#[cfg(test)] +mod tests { + use nix::sys::stat::{major, minor}; + use std::assert_eq; + use std::path::Path; + + use crate::tracer::tests::{copy_uncached_files_and_record_from, setup_test_dir}; + + use crate::replay::tests::generate_cached_files_and_record; + + use super::*; + + static TRACE_BUFFER: &str = r#" + Settingide-502 [001] .... 484.360292: mm_filemap_add_to_page_CACHE: dev 254:6 ino cf1 page=68d477 pfn=59833 ofs=32768 + Settingide-502 [001] .... 484.360311: mm_filemap_add_to_page_cache: dev 254:6 ino cf1 page=759458 pfn=59827 ofs=57344 + BOX_ENTDED-3071 [001] .... 485.276715: mm_filemap_add_to_pag_ecache: dev 254:6 ino 1 page=00cc1c pfn=81748 ofs=13574144 + BOX_ENTDED-3071 [001] .... 485.276990: mm_filemap_add_to_page_cache: dev 254:6 ino cf2 page=36540b pfn=60952 ofs=0 + .gms.peent-843 [001] .... 485.545516: mm_filemap_add_to_page_cache: dev 254:6 ino 1 page=002e8b pfn=58928 ofs=13578240 + .gms.peent-843 [001] .... 485.545820: mm_filemap_add_to_page_cache: dev 254:6 ino cf3 page=6233ce pfn=58108 ofs=0 + an.bg-459 [001] .... 494.029396: mm_filemap_add_to_page_cache: dev 254:3 ino 7cf page=c5b5c7 pfn=373933 ofs=1310720 + an.bg-459 [001] .... 494.029398: mm_filemap_add_to_page_cache: dev 254:3 ino 7cf page=b8b9ec pfn=410074 ofs=1314816 + "#; + + fn sample_mem_traces() -> (String, Vec>) { + ( + TRACE_BUFFER.to_owned(), + vec![ + None, + None, + Some(TraceLineInfo::from_fields(254, 6, 0xcf1, 57344, 484360311000)), + None, + Some(TraceLineInfo::from_fields(254, 6, 0xcf2, 0, 485276990000)), + Some(TraceLineInfo::from_fields(254, 6, 0x1, 13578240, 485545516000)), + Some(TraceLineInfo::from_fields(254, 6, 0xcf3, 0, 485545820000)), + Some(TraceLineInfo::from_fields(254, 3, 0x7cf, 1310720, 494029396000)), + Some(TraceLineInfo::from_fields(254, 3, 0x7cf, 1314816, 494029398000)), + None, + ], + ) + } + + #[test] + fn test_parse_trace_line() { + let (buf, res) = sample_mem_traces(); + let re = TraceLineInfo::get_trace_line_regex().unwrap(); + for (index, line) in buf.lines().enumerate() { + let found = TraceLineInfo::from_trace_line(&re, line).unwrap(); + let expected = res.get(index).unwrap(); + assert_eq!(found.is_some(), expected.is_some()); + if found.is_some() { + assert_eq!(found.unwrap(), *expected.as_ref().unwrap()); + } + } + } + + #[test] + fn test_add_line() { + let test_base_dir = setup_test_dir(); + let (rf, mut files) = + generate_cached_files_and_record(None, true, Some(page_size().unwrap() as u64)); + let (_uncached_rf, uncached_files) = + copy_uncached_files_and_record_from(Path::new(&test_base_dir), &mut files, &rf); + let mut mount_include = HashMap::new(); + + let included_dev = uncached_files.get(0).unwrap().0.metadata().unwrap().dev(); + let included_inode1 = uncached_files.get(0).unwrap().0.metadata().unwrap().ino(); + let included_inode2 = uncached_files.get(1).unwrap().0.metadata().unwrap().ino(); + let included_major = major(included_dev); + let included_minor = minor(included_dev); + mount_include.insert(included_dev, std::fs::canonicalize(test_base_dir).unwrap()); + let mut mount_exclude = HashSet::new(); + mount_exclude.insert(0); + + let mut mem_tracer = MemTraceSubsystem { + device_inode_map: HashMap::new(), + inode_count: 0, + records: vec![], + regex: TraceLineInfo::get_trace_line_regex().unwrap(), + mount_info: MountInfo { + included_devices: mount_include, + excluded_devices: mount_exclude, + }, + tracer_configs: None, + page_size: page_size().unwrap() as u64, + debug_info: DebugInfo { + missing_files: HashMap::new(), + directory_read_bytes: 0, + missing_path_bytes: 0, + privileged_paths: vec![], + }, + }; + + let pg_size = page_size().unwrap(); + // Format is major, minor, inode, offset + let inputs = vec![ + (0, 0, 2, 10), // to be excluded. bad device. + (included_major, included_minor, included_inode1, 0), + (included_major, included_minor, included_inode1, 3 * pg_size), + // duplicate read + (included_major, included_minor, included_inode1, 3 * pg_size), + (0, 0, included_inode1, 10), // to be excluded. bad device. + (included_major, included_minor, included_inode1, 2 * pg_size), // contiguous + // non-contiguous + (included_major, included_minor, included_inode1, 12 * pg_size), + // same offset different inode + (included_major, included_minor, included_inode2, 3 * pg_size), + // Contiguous offset different inode + (included_major, included_minor, included_inode2, pg_size), + ]; + + for (i, (major, minor, inode, offset)) in inputs.iter().enumerate() { + // used to timestamp the log line. + let seconds = i; + // used to timestamp the log line. + let microseconds = i; + for operation in &["mm_filemap_add_to_page_cache", "some_other_operation"] { + let line = format!( + " BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: \ + dev {}:{} ino {:x} page=00000000f936540b pfn=60952 ofs={}", + seconds, microseconds, operation, major, minor, inode, offset + ); + mem_tracer.add_line(&line).unwrap(); + } + } + assert_eq!(mem_tracer.records.len(), 7); + assert_eq!(mem_tracer.device_inode_map.len(), 1); + assert_eq!(mem_tracer.device_inode_map.get(&included_dev).unwrap().len(), 2); + assert!(mem_tracer + .device_inode_map + .get(&included_dev) + .unwrap() + .contains_key(&included_inode1)); + assert!(mem_tracer + .device_inode_map + .get(&included_dev) + .unwrap() + .contains_key(&included_inode2)); + } + + fn new_record(file: u64, offset: u64, length: u64, timestamp: u64) -> Record { + Record { file_id: FileId(file), offset, length, timestamp } + } + + #[test] + fn test_get_records_file() { + let test_base_dir = setup_test_dir(); + let (rf, mut files) = + generate_cached_files_and_record(None, true, Some(page_size().unwrap() as u64)); + let (_uncached_rf, uncached_files) = + copy_uncached_files_and_record_from(Path::new(&test_base_dir), &mut files, &rf); + let mut mount_include = HashMap::new(); + + let included_dev = uncached_files.get(0).unwrap().0.metadata().unwrap().dev(); + let included_inode1 = uncached_files.get(0).unwrap().0.metadata().unwrap().ino(); + let included_inode2 = uncached_files.get(1).unwrap().0.metadata().unwrap().ino(); + let included_major = major(included_dev); + let included_minor = minor(included_dev); + mount_include.insert(included_dev, std::fs::canonicalize(test_base_dir).unwrap()); + let mut mount_exclude = HashSet::new(); + mount_exclude.insert(0); + + let mut mem_tracer = MemTraceSubsystem { + device_inode_map: HashMap::new(), + inode_count: 0, + records: vec![], + regex: TraceLineInfo::get_trace_line_regex().unwrap(), + mount_info: MountInfo { + included_devices: mount_include, + excluded_devices: mount_exclude, + }, + tracer_configs: None, + page_size: page_size().unwrap() as u64, + debug_info: DebugInfo { + missing_files: HashMap::new(), + directory_read_bytes: 0, + missing_path_bytes: 0, + privileged_paths: vec![], + }, + }; + + let pg_size = page_size().unwrap() as u64; + // Format is major, minor, inode, offset + let inputs = vec![ + (0, 0, 2, 10), // to be excluded. bad device. + (included_major, included_minor, included_inode1, 0), + (included_major, included_minor, included_inode1, 3 * pg_size), + // duplicate read + (included_major, included_minor, included_inode1, 3 * pg_size), + (0, 0, included_inode1, 10), // to be excluded. bad device. + (included_major, included_minor, included_inode1, 2 * pg_size), // contiguous + // non-contiguous + (included_major, included_minor, included_inode1, 12 * pg_size), + // same offset different inode + (included_major, included_minor, included_inode2, 3 * pg_size), + // Contiguous offset different inode + (included_major, included_minor, included_inode2, pg_size), + ]; + + for (i, (major, minor, inode, offset)) in inputs.iter().enumerate() { + // used to timestamp the log line. + let seconds = i; + // used to timestamp the log line. + let microseconds = i; + for operation in &["mm_filemap_add_to_page_cache", "some_other_operation"] { + let line = format!( + " BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: \ + dev {}:{} ino {:x} page=00000000f936540b pfn=60952 ofs={}", + seconds, microseconds, operation, major, minor, inode, offset + ); + mem_tracer.add_line(&line).unwrap(); + } + } + let rf = mem_tracer.build_records_file().unwrap(); + assert_eq!( + rf.inner.records, + vec![ + new_record(0, 0, pg_size, 1000001000), + new_record(0, 2 * pg_size, 2 * pg_size, 2000002000), + new_record(0, 12 * pg_size, pg_size, 6000006000), + new_record(1, pg_size, pg_size, 8000008000), + new_record(1, 3 * pg_size, pg_size, 7000007000), + ] + ); + } +} diff --git a/init/libprefetch/prefetch/src/tracer/mod.rs b/init/libprefetch/prefetch/src/tracer/mod.rs new file mode 100644 index 000000000..0f1611675 --- /dev/null +++ b/init/libprefetch/prefetch/src/tracer/mod.rs @@ -0,0 +1,965 @@ +// Copyright (C) 2024 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Tracer supports collecting information based off of two different tracing +//! subsystems within `/sys/kernel/tracing`. +//! +//! ## Mem +//! Mem is preferred tracer. +//! ### Phase 1: +//! This phase relies on a trace event at +//! "events/filemap/mm_filemap_add_to_page_cache". When enabled, the event logs +//! a message that contains device id, inode number, offset of the page that is +//! being read. The tracer makes a note of this. +//! +//! ### Phase 2: +//! When the recording of events is done, tracer all get mount points for which +//! device id is recorded. Once it knows the mount points, it looks up file +//! paths for the inode numbers that it records. The paths, offset and lengths +//! are then stored in records file. +//! +//! Phase 2 is very IO intensive as entire filesystem is walked to find paths +//! for different inodes. +//! +pub(crate) mod mem; + +use std::{ + boxed::Box, + collections::HashSet, + fs::{create_dir, read_to_string, rename, File, OpenOptions}, + io::{BufRead, BufReader, Read, Write}, + path::{Path, PathBuf}, + string::ToString, + sync::mpsc::{self, Receiver, Sender}, +}; + +use log::{error, info}; +use nix::time::ClockId; +use serde::Deserialize; +use serde::Serialize; + +use crate::error::Error; +use crate::{args::TracerType, format::RecordsFile}; +use mem::MemTraceSubsystem; + +pub(crate) static EXCLUDE_PATHS: &[&str] = + &["/dev/", "/proc/", "/sys/", "/tmp/", "/run/", "/config/", "/mnt/", "/storage/"]; + +/// During record phase, prefetch may modify files under `/sys/kernel/tracing/` to +/// - change trace buffer size so that we don't lose trace events +/// - enable a few trace events +/// - enable tracing +/// +/// The old values are restored at the end of record. +#[derive(Debug, Serialize, Deserialize)] +pub(crate) struct TraceEventFile { + path: PathBuf, + restore_value: Option, +} + +impl TraceEventFile { + fn open_and_write(path: &Path, value: &str) -> Result<(), Error> { + let mut f = OpenOptions::new() + .write(true) + .read(true) + .open(path) + .map_err(|e| Error::Open { source: e, path: path.to_str().unwrap().to_string() })?; + f.write_all(value.as_bytes()) + .map_err(|e| Error::Write { path: path.to_str().unwrap().to_owned(), source: e }) + } + + pub fn write(path: PathBuf, value: &str) -> Result { + let restore_value = read_to_string(&path).map_err(|s| Error::Read { + error: format!("Reading {} failed:{}", path.to_str().unwrap(), s), + })?; + + Self::open_and_write(&path, value)?; + + info!( + "Changed contents of {} from {:?} to {}", + path.to_str().unwrap(), + restore_value, + value + ); + Ok(Self { path, restore_value: Some(restore_value) }) + } + + pub fn enable(path: PathBuf) -> Result { + Self::write(path, "1") + } + + pub fn restore(&self) -> Result<(), Error> { + if let Some(restore_value) = &self.restore_value { + Self::open_and_write(&self.path, restore_value) + } else { + Ok(()) + } + } +} + +impl Drop for TraceEventFile { + fn drop(&mut self) { + if let Err(ret) = self.restore() { + error!( + "Failed to restore state of file {:?} with value: {:?}. Error: {}", + self.path, + self.restore_value, + ret.to_string() + ); + } + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct TracerConfigs { + pub excluded_paths: Vec, + pub buffer_size_file_path: String, + pub trace_base_path: PathBuf, + pub trace_events: Vec, + pub mountinfo_path: Option, + pub trace_operations: HashSet, + // We never read back these fields. The only use for holding these around is to restore state at + // the end of run. + #[allow(dead_code)] + trace_files: Vec, +} + +impl TracerConfigs { + pub fn new( + kb_buffer_size: Option, + setup_tracing: bool, + tracer_type: TracerType, + trace_mount_point: Option, + tracing_instance: Option, + ) -> Result { + static TRACE_MOUNT_POINT: &str = "/sys/kernel/tracing"; + + // Trace buffer size file relative to trace mount point + static TRACE_BUFFER_SIZE_FILE: &str = "buffer_size_kb"; + + let trace_mount_point = trace_mount_point.unwrap_or_else(|| TRACE_MOUNT_POINT.to_owned()); + let trace_base_path = if let Some(instance) = tracing_instance { + Path::new(&trace_mount_point).join("instances").join(instance) + } else { + Path::new(&trace_mount_point).to_owned() + }; + + if setup_tracing && !trace_base_path.exists() { + create_dir(&trace_base_path).map_err(|e| Error::Create { + source: e, + path: trace_base_path.to_str().unwrap().to_owned(), + })?; + } + + if !trace_base_path.exists() { + return Err(Error::Custom { + error: format!( + "trace mount point doesn't exist: {}", + trace_base_path.to_str().unwrap().to_owned() + ), + }); + } + + let mut configs = TracerConfigs { + excluded_paths: vec![], + buffer_size_file_path: TRACE_BUFFER_SIZE_FILE.to_owned(), + trace_base_path, + trace_events: vec![], + mountinfo_path: None, + trace_operations: HashSet::new(), + trace_files: vec![], + }; + + match tracer_type { + TracerType::Mem => MemTraceSubsystem::update_configs(&mut configs), + } + + if setup_tracing { + let trace_base_dir = Path::new(&configs.trace_base_path); + if let Some(kb_buffer_size) = kb_buffer_size { + configs.trace_files.push(TraceEventFile::write( + trace_base_dir.join(&configs.buffer_size_file_path), + &kb_buffer_size.to_string(), + )?); + } + for path in &configs.trace_events { + configs.trace_files.push(TraceEventFile::enable(trace_base_dir.join(path))?); + } + } + + Ok(configs) + } +} + +/// Returns time, in nanoseconds, since boot +pub fn nanoseconds_since_boot() -> u64 { + if let Ok(t) = nix::time::clock_gettime(ClockId::CLOCK_MONOTONIC) { + //((t.tv_sec() * 1_000_000_000) + t.tv_nsec()) as u64 + (1 + t.tv_nsec()) as u64 + } else { + 0 + } +} + +pub(crate) trait TraceSubsystem { + /// This routine is called whenever there is a new line available to be parsed. + /// The impl potentially want to parse the line and retain the data in memory. + /// Implementors are not expected to do heavy lifting tasks, like IO, in this context. + fn add_line(&mut self, line: &str) -> Result<(), Error>; + + /// Generates a records file from all the collected data. + /// From this context, the implementors might process data by issuing queries to filesystems. + fn build_records_file(&mut self) -> Result; + + /// This helps us serialize internat state of tracing subsystem during record phase. + /// This allows us to get raw data for analysis of read pattern and debugging in situations + /// when we might not have access to system yet(ex. early boot phase) . + fn serialize(&self, writer: &mut dyn Write) -> Result<(), Error>; +} + +/// Returns page size in bytes +pub(crate) fn page_size() -> Result { + Ok(nix::unistd::sysconf(nix::unistd::SysconfVar::PAGE_SIZE) + .map_err(|e| Error::Custom { error: format!("failed to query page size: {}", e) })? + .ok_or(Error::Custom { error: "failed to query page size: None returned".to_string() })? + as usize) +} + +pub struct Tracer { + // Open handle to static trace buffer file which is usually located at + // `/sys/kernel/tracing/trace`. + // See comment on top of `trace` function. + trace_file: BufReader, + + // Open handle to trace pipe which is usually located at + // `/sys/kernel/tracing/trace_pipe`. + // See comment on top of `trace` function. + trace_pipe: BufReader, + + // Signal to exit the infinite loop in `trace()` + exit_rx: Receiver<()>, + + // tracing subsystem that actually parses trace lines and builds records. + tracing_subsystem: Box, +} + +impl Tracer { + pub fn create( + kb_buffer_size: Option, + tracer_type: TracerType, + tracing_instance: Option, + setup_tracing: bool, + ) -> Result<(Self, Sender<()>), Error> { + /// Trace pipe path relative to trace mount point + static TRACE_PIPE_PATH: &str = "trace_pipe"; + + /// Trace file path relative to trace mount point + static TRACE_FILE_PATH: &str = "trace"; + + let configs = TracerConfigs::new( + kb_buffer_size, + setup_tracing, + tracer_type.clone(), + None, + tracing_instance, + )?; + + let pipe_path = Path::new(&configs.trace_base_path).join(TRACE_PIPE_PATH); + let trace_pipe = File::open(&pipe_path) + .map_err(|e| Error::Open { source: e, path: pipe_path.to_str().unwrap().to_owned() })?; + + let file_path = Path::new(&configs.trace_base_path).join(TRACE_FILE_PATH); + let trace_file = File::open(&file_path) + .map_err(|e| Error::Open { source: e, path: file_path.to_str().unwrap().to_owned() })?; + let tracer: Box = match tracer_type { + TracerType::Mem => Box::new(MemTraceSubsystem::create_with_configs(configs)?), + }; + + Self::create_with_config(trace_file, trace_pipe, tracer) + } + + fn create_with_config( + file: File, + pipe: File, + tracer: Box, + ) -> Result<(Self, Sender<()>), Error> { + let (exit_tx, exit_rx) = mpsc::channel(); + let trace_pipe = BufReader::new(pipe); + let trace_file = BufReader::new(file); + + Ok((Self { trace_file, trace_pipe, exit_rx, tracing_subsystem: tracer }, exit_tx)) + } + + fn save_intermediate_state(&self, intermediate_file: Option<&PathBuf>) -> Result<(), Error> { + if let Some(int_path) = intermediate_file { + let mut tmp_file = int_path.clone(); + tmp_file.set_extension("int.tmp"); + let mut out_file = File::create(&tmp_file).map_err(|source| Error::Create { + source, + path: int_path.to_str().unwrap().to_owned(), + })?; + self.tracing_subsystem.serialize(&mut out_file)?; + rename(&tmp_file, int_path).map_err(|e| Error::Custom { + error: format!( + "rename file from{} to:{} failed with {}", + tmp_file.to_str().unwrap(), + int_path.to_str().unwrap(), + e + ), + })?; + } + Ok(()) + } + + /// This routine parses all the events since last reset of trace buffer. + /// + /// The linux tracing subsystem exposes two interfaces to get trace events from + /// 1. a file - usually at `/sys/kernel/tracing/trace` + /// 2. a pipe - usually at `/sys/kernel/tracing/trace_pipe` + /// + /// The file is *sort of* ring buffer which works off of `buffer_size_kb` sized buffer. + /// Relying on it is not very efficient as we end up getting a lot of duplicates. + /// + /// The pipe only contains line traces. Any trace events that occurred before opening + /// of this file are lost. + /// + /// IMPORTANT: The moment we start reading from the pipe, the events in the file + /// disappear/reset. So we should read file entirely before we start reading the pipe. + pub fn trace(&mut self, intermediate_file: Option<&PathBuf>) -> Result { + let mut buf = String::new(); + self.trace_file + .read_to_string(&mut buf) + .map_err(|e| Error::Read { error: format!("failed to read trace file: {}", e) })?; + + for line in buf.lines() { + let trimmed = line.trim_end(); + self.tracing_subsystem.add_line(trimmed)?; + } + + // The logic here is to block on trace_pipe forever. We break out of loop only when we read + // a line from the pipe *and* we have received an event on exit_rx. + // This logic works because the system will have one or more read syscalls and also we, + // at the moment, use prefetch on build systems and not in production to generate records + // file. + // + // TODO(b/302045304): async read trace_pipe. + while self.exit_rx.try_recv().is_err() { + let mut line = String::new(); + let len = self + .trace_pipe + .read_line(&mut line) + .map_err(|e| Error::Read { error: e.to_string() })?; + let trimmed = line.trim_end(); + if len == 0 { + // We should never read zero length line or reach EOF of the pipe. + return Err(Error::Read { + error: "read zero length line from trace_pipe".to_string(), + }); + } + self.tracing_subsystem.add_line(trimmed)?; + } + + // We are here because the above loop exited normally. Traced lines are stored in `Self`. + // Build `RecordsFile` from processing data from read lines above. + self.save_intermediate_state(intermediate_file)?; + let rf = self.tracing_subsystem.build_records_file()?; + self.save_intermediate_state(intermediate_file)?; + Ok(rf) + } +} + +#[cfg(test)] +pub(crate) mod tests { + use crate::RecordsFile; + + use std::alloc::Layout; + use std::borrow::ToOwned; + use std::convert::TryInto; + use std::fs::{create_dir_all, OpenOptions}; + use std::io::Read; + use std::io::Seek; + use std::io::Write; + use std::ops::Range; + use std::os::linux::fs::MetadataExt; + use std::os::unix::fs::symlink; + use std::os::unix::prelude::OpenOptionsExt; + use std::path::Path; + use std::thread; + use std::time::Duration; + use std::{assert_eq, env}; + + use libc::O_DIRECT; + use nix::sys::stat::{major, minor}; + use nix::unistd::pipe; + use rand::distributions::Alphanumeric; + use rand::Rng; + use tempfile::NamedTempFile; + + use super::*; + use crate::replay::tests::generate_cached_files_and_record; + use std::ops::{Deref, DerefMut}; + + #[test] + fn trace_event_file_enable_and_restore() { + let mut file = NamedTempFile::new().unwrap(); + let _ = file.write("0".as_bytes()).unwrap(); + { + let _e = TraceEventFile::enable(file.path().to_owned()).unwrap(); + assert_eq!(read_to_string(file.path()).unwrap(), "1"); + } + assert_eq!(read_to_string(file.path()).unwrap(), "0"); + } + + #[test] + fn trace_event_file_write_and_restore() { + let mut file = NamedTempFile::new().unwrap(); + let _ = file.write("hello".as_bytes()).unwrap(); + { + let _e = TraceEventFile::write(file.path().to_owned(), "world").unwrap(); + assert_eq!(read_to_string(file.path()).unwrap(), "world"); + } + assert_eq!(read_to_string(file.path()).unwrap(), "hello"); + } + + fn setup_trace_mount_point( + create_mount_point: bool, + create_instances: bool, + instance_name: Option, + ) -> PathBuf { + assert!( + create_mount_point || !create_instances, + "cannot create instances without creating mount point" + ); + + let mount_point = env::temp_dir().join( + rand::thread_rng() + .sample_iter(&Alphanumeric) + .take(10) + .map(char::from) + .collect::(), + ); + + let mut base_path = Path::new(&mount_point).to_owned(); + if create_mount_point { + create_dir(&mount_point).unwrap(); + } + + if create_instances { + base_path = base_path.join("instances"); + if let Some(instance_name) = &instance_name { + base_path = base_path.join(instance_name) + } + create_dir_all(&base_path).unwrap(); + } + + if create_mount_point || create_instances { + std::fs::write(&base_path.join("buffer_size_kb"), "100").unwrap(); + std::fs::write(&base_path.join("tracing_on"), "0").unwrap(); + std::fs::write(&base_path.join("trace"), "0").unwrap(); + std::fs::write(&base_path.join("trace_pipe"), "0").unwrap(); + + for event in [ + "events/fs/do_sys_open", + "events/fs/open_exec", + "events/fs/uselib", + "events/filemap/mm_filemap_add_to_page_cache", + ] { + let event_path = base_path.join(event); + std::fs::create_dir_all(&event_path).unwrap(); + std::fs::write(&event_path.join("enable"), "0").unwrap(); + } + } + mount_point + } + + #[test] + fn test_configs_no_setup() { + let mount_point = setup_trace_mount_point(true, true, None); + let _configs = TracerConfigs::new( + Some(10), + false, + TracerType::Mem, + Some(mount_point.to_str().unwrap().to_owned()), + None, + ) + .unwrap(); + } + + #[test] + fn test_configs_no_setup_no_mount_point() { + let mount_point = setup_trace_mount_point(false, false, None); + assert_eq!( + TracerConfigs::new( + Some(10), + false, + TracerType::Mem, + Some(mount_point.to_str().unwrap().to_owned()), + None, + ) + .unwrap_err() + .to_string(), + format!( + "Failed to setup prefetch: trace mount point doesn't exist: {}", + mount_point.to_str().unwrap() + ) + ); + } + + #[test] + fn test_configs_no_setup_no_instances() { + let mount_point = setup_trace_mount_point(true, false, None); + assert_eq!( + TracerConfigs::new( + Some(10), + false, + TracerType::Mem, + Some(mount_point.to_str().unwrap().to_owned()), + Some("my_instance".to_owned()), + ) + .unwrap_err() + .to_string(), + format!( + "Failed to setup prefetch: trace mount point doesn't exist: {}/instances/my_instance", + mount_point.to_str().unwrap() + ) + ); + } + + #[test] + fn test_configs_setup_without_instances() { + let mount_point = setup_trace_mount_point(true, false, None); + assert!(TracerConfigs::new( + Some(10), + true, + TracerType::Mem, + Some(mount_point.to_str().unwrap().to_owned()), + None + ) + .is_ok()); + } + + #[test] + fn test_configs_setup_with_instances() { + let mount_point = setup_trace_mount_point(true, true, Some("my_instance".to_owned())); + assert!(TracerConfigs::new( + Some(10), + true, + TracerType::Mem, + Some(mount_point.to_str().unwrap().to_owned()), + Some("my_instance".to_owned()) + ) + .is_ok()) + } + + pub(crate) fn setup_test_dir() -> PathBuf { + let test_base_dir: String = rand::thread_rng() + .sample_iter(&rand::distributions::Alphanumeric) + .take(7) + .map(char::from) + .collect(); + let test_base_dir = format!( + "{}/test/{}", + std::fs::read_link("/proc/self/exe").unwrap().parent().unwrap().to_str().unwrap(), + test_base_dir + ); + std::fs::create_dir_all(&test_base_dir).unwrap(); + PathBuf::from(test_base_dir) + } + + fn modify_records_file(rf: &RecordsFile, target: &str) -> RecordsFile { + let mut modified_rf = rf.clone(); + + for inode in modified_rf.inner.inode_map.values_mut() { + let new_paths: Vec = inode + .paths + .iter() + .map(|s| { + let parent = Path::new(s).parent().unwrap().to_str().unwrap(); + s.replace(parent, target) + }) + .collect(); + + inode.paths = new_paths; + } + + modified_rf + } + + struct AlignedBuffer { + ptr: *mut u8, + len: usize, + layout: Layout, + } + + impl AlignedBuffer { + fn new(size: usize, alignment: usize) -> Result { + if size == 0 { + return Err(Error::Custom { error: "cannot allocate zero bytes".to_string() }); + } + + let layout = Layout::from_size_align(size, alignment).unwrap(); + // SAFETY: + // - `size` is a valid non-zero positive integer representing the desired buffer size. + // - The layout is checked for validity using `.unwrap()`. + let ptr = unsafe { std::alloc::alloc(layout) }; + if ptr.is_null() { + return Err(Error::Custom { error: format!("alloc failed: size: {}", size) }); + } + Ok(AlignedBuffer { ptr, len: size, layout }) + } + } + + impl Deref for AlignedBuffer { + type Target = [u8]; + // SAFETY: + // - self.ptr is a valid pointer obtained from a successful allocation in the new() method. + // - self.len is a valid length used for allocation in the new() method. + fn deref(&self) -> &Self::Target { + unsafe { std::slice::from_raw_parts(self.ptr, self.len) } + } + } + + impl DerefMut for AlignedBuffer { + // SAFETY: + // - self.ptr is a valid pointer obtained from a successful allocation in the new() method. + // - self.len is a valid length used for allocation in the new() method. + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) } + } + } + + impl Drop for AlignedBuffer { + fn drop(&mut self) { + // SAFETY: + // - self.ptr is a valid pointer obtained from a successful allocation in the new() method. + // - self.layout is the Layout used to allocate the memory. + unsafe { + std::alloc::dealloc(self.ptr, self.layout); + } + } + } + + // Copies `files` into directory pointed by `base`. + // + // The newly created file's data is potentially uncached - i.e. the new + // files are opened in O_DIRECT. + // + // WARNING: Though this function makes an attempt to copy into uncached files + // but it cannot guarantee as other processes in the system may access the + // files. This may lead to flaky tests or unexpected results. + pub(crate) fn copy_uncached_files_and_record_from( + base: &Path, + files: &mut [(NamedTempFile, Vec>)], + rf: &RecordsFile, + ) -> (RecordsFile, Vec<(PathBuf, Vec>)>) { + let mut new_files = vec![]; + for (in_file, ranges) in files { + let out_path = base.join(in_file.path().file_name().unwrap()); + let mut out_file = OpenOptions::new() + .read(true) + .write(true) + .custom_flags(O_DIRECT) + .create_new(true) + .open(&out_path) + .expect("Can't open"); + let page_size = page_size().unwrap() as u64; + let in_file_size = in_file.metadata().unwrap().len(); + assert_eq!( + in_file_size % page_size, + 0, + "we create files that are aligned to page size" + ); + let out_file_size = in_file_size; + let mut buf = + AlignedBuffer::new(out_file_size.try_into().unwrap(), page_size as usize).unwrap(); + let _ = in_file.read(&mut *buf).unwrap(); + out_file.write_all(&*buf).unwrap(); + + new_files.push((out_path, ranges.clone())); + } + + for inode in rf.inner.inode_map.values() { + for path in &inode.paths { + let in_path = Path::new(&path); + let out_path = base.join(in_path.file_name().unwrap()); + if !out_path.exists() { + let orig_file = + out_path.file_name().unwrap().to_str().unwrap().replace("-symlink", ""); + symlink(orig_file, out_path.to_str().unwrap()).unwrap(); + new_files.push((out_path.to_owned(), vec![])); + } + } + } + let modified_rf = modify_records_file(rf, base.to_str().unwrap()); + (modified_rf, new_files) + } + + // Generates mem trace string from given args. Sometimes injects lines that are of no importance + fn mem_generate_trace_line_for_open(path: &Path, time: u16, _op: Option<&str>) -> Vec { + let op = "mm_filemap_add_to_page_cache"; + let stat = path.metadata().unwrap(); + let major_no = major(stat.st_dev()); + let minor_no = minor(stat.st_dev()); + let inode_number = stat.st_ino(); + + vec![ + // unknown operation + format!( + " SettingsProvide-502 [001] .... {}.{}: {}: dev {}:{} ino {:x} \ + page=000000008b759458 pfn=59827 ofs=0", + time, + (time * 100) + time, + "unknown_operation", + major_no, + minor_no, + inode_number, + ), + // invalid/relative inode + format!( + " SettingsProvide-502 [001] .... {}.{}: {}: dev {}:{} ino {:x} \ + page=000000008b759458 pfn=59827 ofs=0", + time, + (time * 100) + time, + "unknown_operation", + major_no, + minor_no, + inode_number + 100, + ), + // good one + format!( + " BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: dev {}:{} ino {:x} \ + page=00000000f936540b pfn=60952 ofs={}", + time, + (time * 100) + time, + op, + major_no, + minor_no, + inode_number, + 0 + ), + // good one + format!( + " BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: dev {}:{} ino {:x} \ + page=00000000f936540b pfn=60952 ofs={}", + time, + (time * 100) + time, + op, + major_no, + minor_no, + inode_number, + 10_000, + ), + // good one + format!( + " BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: dev {}:{} ino {:x} \ + page=00000000f936540b pfn=60952 ofs={}", + time, + (time * 100) + time, + op, + major_no, + minor_no, + inode_number, + 100_000, + ), + // good one + format!( + " BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: dev {}:{} ino {:x} \ + page=00000000f936540b pfn=60952 ofs={}", + time, + (time * 100) + time, + op, + major_no, + minor_no, + inode_number, + 1_000_000, + ), + // invalid operation case + format!( + " SettingsProvide-502 [001] .... {}.{}: {}: dev {}:{} ino {:x} \ + page=000000008b759458 pfn=59827 ofs=0", + time, + (time * 100) + time, + op.to_uppercase(), + major_no, + minor_no, + inode_number, + ), + ] + } + + fn generate_trace_line_for_open( + tracing_type: TracerType, + path: &Path, + time: u16, + op: Option<&str>, + ) -> Vec { + match tracing_type { + TracerType::Mem => mem_generate_trace_line_for_open(path, time, op), + } + } + + // Generates a fake mountinfo file with bunch of fake mount point and + // fakes given path as a mount point. + fn create_fake_mountinfo_for(path: &Path) -> NamedTempFile { + let stat = path.metadata().unwrap(); + let major_no = major(stat.st_dev()); + let minor_no = minor(stat.st_dev()); + let mut mountinfo_path = NamedTempFile::new().unwrap(); + mountinfo_path + .write_all( + "16 15 0:17 / /dev/pts rw,relatime shared:3 - devpts devpts \ + rw,seclabel,mode=600,ptmxmode=000\n" + .as_bytes(), + ) + .unwrap(); + mountinfo_path + .write_all( + "17 26 0:18 / /proc rw,relatime shared:4 - proc proc rw,gid=3009,hidepid=\ + invisible\n" + .as_bytes(), + ) + .unwrap(); + mountinfo_path + .write_all( + format!( + "26 24 {}:{} / {} ro,nodev,noatime shared:1 - ext4 /dev/block/dm-3 ro,\ + seclabel,errors=panic\n", + major_no, + minor_no, + path.to_str().unwrap(), + ) + .as_bytes(), + ) + .unwrap(); + + mountinfo_path + } + + static RECORD_PER_FILE: usize = 4; + + fn create_tracer( + base_dir: &Path, + t: TracerType, + ) -> (Box, Vec) { + let kb_buffer_size = Some(8388608); + let trace_mount_point = setup_test_dir(); + let mut buffer_size_file = NamedTempFile::new_in(&trace_mount_point).unwrap(); + buffer_size_file + .write_all(format!("{}", kb_buffer_size.as_ref().unwrap()).as_bytes()) + .unwrap(); + + let buffer_size_file_path = buffer_size_file.path().to_str().unwrap().to_string(); + let mut config = TracerConfigs::new( + kb_buffer_size, + false, + t.clone(), + Some(trace_mount_point.to_str().unwrap().to_string()), + None, + ) + .unwrap(); + let mut tempfiles = vec![buffer_size_file]; + ( + match t { + TracerType::Mem => { + let mountinfo_path = + create_fake_mountinfo_for(&base_dir.canonicalize().unwrap()); + config.trace_events = vec![]; + config.buffer_size_file_path = buffer_size_file_path; + config.mountinfo_path = + Some(mountinfo_path.path().to_str().unwrap().to_string()); + tempfiles.push(mountinfo_path); + Box::new(MemTraceSubsystem::create_with_configs(config).unwrap()) + } + }, + tempfiles, + ) + } + + fn test_trace_of_type(tracing_type: TracerType) { + let test_base_dir = setup_test_dir(); + let (_rf, files) = generate_cached_files_and_record( + Some(&test_base_dir), + true, + Some(page_size().unwrap() as u64), + ); + + let mut file = NamedTempFile::new().unwrap(); + let (reader_fd, writer_fd) = pipe().unwrap(); + let reader = File::from(reader_fd); + let mut writer = File::from(writer_fd); + + let (tracer, _temp_files) = create_tracer(&test_base_dir, tracing_type.clone()); + + let mut files_iter = files.iter(); + + for line in generate_trace_line_for_open( + tracing_type.clone(), + files_iter.next().unwrap().0.path(), + 5, + None, + ) { + writeln!(file, "{}", line).unwrap(); + } + file.sync_all().unwrap(); + file.seek(std::io::SeekFrom::Start(0)).unwrap(); + + let (mut tracer, exit_evt) = + Tracer::create_with_config(file.reopen().unwrap(), reader, tracer).unwrap(); + + let thd = thread::spawn(move || tracer.trace(None)); + + for (index, file) in files_iter.enumerate() { + for line in generate_trace_line_for_open(tracing_type.clone(), file.0.path(), 10, None) + { + writeln!(&mut writer, "{}", line).unwrap(); + } + if index == 0 { + // This sleep emulates delay in data arriving over a pipe. This shouldn't cause + // flakes in virtualized environment. + thread::sleep(Duration::from_secs(1)); + } + } + + thread::sleep(Duration::from_millis(100)); + exit_evt.send(()).unwrap(); + writeln!(&mut writer, "line").unwrap(); + + let tracer_rf = thd.join().unwrap().unwrap(); + + let mut found_count = 0; + for file in &files { + let mut found = false; + 'inner: for inode in tracer_rf.inner.inode_map.values() { + for found_path in &inode.paths { + if found_path == file.0.path().canonicalize().unwrap().to_str().unwrap() { + found = true; + break 'inner; + } + } + } + if found { + found_count += 1; + } else { + println!("missing {:?}", file.0.path()); + } + } + assert_eq!(found_count, files.len()); + assert_eq!(tracer_rf.inner.records.len(), files.len() * RECORD_PER_FILE); + } + + #[test] + fn test_trace_mem() { + test_trace_of_type(TracerType::Mem) + } +}