Merge "libprefetch: library to prefetch data using tracing." into main am: e367550792

Original change: https://android-review.googlesource.com/c/platform/system/core/+/3364432

Change-Id: I851a571a137a52c9ad9f8b6ef42d377e09281b61
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
This commit is contained in:
Akilesh Kailash 2024-11-20 05:26:09 +00:00 committed by Automerger Merge Worker
commit 7ca6515361
14 changed files with 5076 additions and 0 deletions

View file

@ -0,0 +1,80 @@
//
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package {
default_team: "trendy_team_android_kernel",
default_applicable_licenses: ["Android-Apache-2.0"],
}
rust_library_rlib {
name: "libprefetch_rs",
crate_name: "prefetch_rs",
srcs: ["src/lib.rs"],
rustlibs: [
"libandroid_logger",
"libargh",
"libchrono",
"libcrc32fast",
"libcsv",
"liblibc",
"liblog_rust",
"liblru_cache",
"libnix",
"librand",
"librayon",
"libregex",
"libserde_cbor",
"libserde_json",
"libserde",
"libthiserror",
"libwalkdir",
"librustutils",
],
prefer_rlib: true,
features: [
"derive",
"error-context",
"help",
"std",
"usage",
"use_argh",
],
}
rust_binary {
name: "prefetch",
crate_name: "prefetch",
srcs: ["src/main.rs"],
rustlibs: [
"libprefetch_rs",
"liblog_rust",
"libandroid_logger",
],
prefer_rlib: true,
features: [
"default",
"derive",
"error-context",
"help",
"std",
"usage",
"use_argh",
],
init_rc: [
"prefetch.rc",
],
}
// TODO: Add rust_test to enable unit testing - b/378554334

743
init/libprefetch/prefetch/Cargo.lock generated Normal file
View file

@ -0,0 +1,743 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
dependencies = [
"memchr",
]
[[package]]
name = "android_log-sys"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85965b6739a430150bdd138e2374a98af0c3ee0d030b3bb7fc3bddff58d0102e"
[[package]]
name = "android_logger"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9ed09b18365ed295d722d0b5ed59c01b79a826ff2d2a8f73d5ecca8e6fb2f66"
dependencies = [
"android_log-sys",
"env_logger",
"lazy_static",
"log",
]
[[package]]
name = "argh"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab257697eb9496bf75526f0217b5ed64636a9cfafa78b8365c71bd283fcef93e"
dependencies = [
"argh_derive",
"argh_shared",
]
[[package]]
name = "argh_derive"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b382dbd3288e053331f03399e1db106c9fb0d8562ad62cb04859ae926f324fa6"
dependencies = [
"argh_shared",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "argh_shared"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5693f39141bda5760ecc4111ab08da40565d1771038c4a0250f03457ec707531"
dependencies = [
"serde",
]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi 0.1.19",
"libc",
"winapi 0.3.9",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bincode"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b92615d57e4048e480bd7e3c2d7f6ec252819fffec95efbc30ec7c68744aa66c"
dependencies = [
"byteorder",
"serde",
]
[[package]]
name = "bitflags"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de"
[[package]]
name = "bstr"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a40b47ad93e1a5404e6c18dec46b628214fee441c70f4ab5d6942142cc268a3d"
dependencies = [
"lazy_static",
"memchr",
"regex-automata",
"serde",
]
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cfg_aliases"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"serde",
"time",
"winapi 0.3.9",
]
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "176dc175b78f56c0f321911d9c8eb2b77a78a4860b9c19db83835fea1a46649b"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
[[package]]
name = "csv"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
dependencies = [
"bstr",
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
dependencies = [
"memchr",
]
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "env_logger"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
dependencies = [
"atty",
"humantime",
"log",
"regex",
"termcolor",
]
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
[[package]]
name = "getrandom"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
dependencies = [
"cfg-if",
"libc",
"wasi 0.11.0+wasi-snapshot-preview1",
]
[[package]]
name = "half"
version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "hermit-abi"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]]
name = "itoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "kernel32-sys"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
dependencies = [
"winapi 0.2.8",
"winapi-build",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.162"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398"
[[package]]
name = "linked-hash-map"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]]
name = "log"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [
"cfg-if",
]
[[package]]
name = "lru-cache"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c"
dependencies = [
"linked-hash-map",
]
[[package]]
name = "memchr"
version = "2.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
[[package]]
name = "nix"
version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
dependencies = [
"bitflags",
"cfg-if",
"cfg_aliases",
"libc",
]
[[package]]
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
"hermit-abi 0.3.4",
"libc",
]
[[package]]
name = "ppv-lite86"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "prefetch"
version = "0.1.0"
dependencies = [
"android_logger",
"argh",
"bincode",
"chrono",
"crc32fast",
"csv",
"env_logger",
"libc",
"log",
"lru-cache",
"memchr",
"nix",
"proc-macro2",
"quote",
"rand 0.8.5",
"rayon",
"rayon-core",
"regex",
"serde",
"serde_cbor",
"serde_derive",
"serde_json",
"tempfile",
"thiserror",
"thiserror-impl",
"walkdir",
]
[[package]]
name = "proc-macro2"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.3.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c"
dependencies = [
"libc",
"rand 0.4.6",
]
[[package]]
name = "rand"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293"
dependencies = [
"fuchsia-cprng",
"libc",
"rand_core 0.3.1",
"rdrand",
"winapi 0.3.9",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core 0.6.4",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core 0.6.4",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
dependencies = [
"rand_core 0.4.2",
]
[[package]]
name = "rand_core"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "rayon"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674"
dependencies = [
"autocfg",
"crossbeam-deque",
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"lazy_static",
"num_cpus",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
dependencies = [
"rand_core 0.3.1",
]
[[package]]
name = "redox_syscall"
version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
[[package]]
name = "regex"
version = "1.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a26af418b574bd56588335b3a3659a65725d4e636eb1016c2f9e3b38c7cc759"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "ryu"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_cbor"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5"
dependencies = [
"half",
"serde",
]
[[package]]
name = "serde_derive"
version = "1.0.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.62"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea1c6153794552ea7cf7cf63b1231a25de00ec90db326ba6264440fa08e31486"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "syn"
version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d010a1623fbd906d51d650a9916aaefc05ffa0e4053ff7fe601167f3e715d194"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "tempfile"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11ce2fe9db64b842314052e2421ac61a73ce41b898dc8e3750398b219c5fc1e0"
dependencies = [
"kernel32-sys",
"libc",
"rand 0.3.23",
"redox_syscall",
"winapi 0.2.8",
]
[[package]]
name = "termcolor"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
dependencies = [
"winapi-util",
]
[[package]]
name = "thiserror"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "time"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a"
dependencies = [
"libc",
"wasi 0.10.0+wasi-snapshot-preview1",
"winapi 0.3.9",
]
[[package]]
name = "unicode-xid"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
[[package]]
name = "walkdir"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-build"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View file

@ -0,0 +1,51 @@
[package]
name = "prefetch"
version = "0.1.0"
edition = "2018"
default-run = "prefetch"
[lib]
name = "prefetch_rs"
path = "src/lib.rs"
[[bin]]
name = "prefetch"
path = "src/main.rs"
[features]
default = ["use_argh"]
use_argh = ["argh"]
[dependencies]
argh = { version = "0.1.10", optional = true }
chrono = { version = "=0.4.19", features = ["serde"] }
crc32fast = "1.2.1"
csv = "=1.1.6"
libc = "0.2.82"
log = "=0.4.14"
lru-cache = "0.1.2"
memchr = "=2.3.4"
nix = {version = "0.28", features = ["fs", "time", "feature", "mman", "uio"]}
proc-macro2 = "=1.0.26"
quote = "=1.0.9"
rand = "0.8.3"
rayon = "=1.5.0"
rayon-core = "=1.9.0"
regex = "1.4.5"
serde = { version = "*", features = ["derive"] }
serde_cbor = "0.11.2"
serde_derive = "=1.0.123"
serde_json = "=1.0.62"
thiserror = "=1.0.24"
thiserror-impl = "1.0.24"
walkdir = "2.3.2"
# crates required for android builds
[target.'cfg(target_os = "android")'.dependencies]
android_logger = "0.10.1"
# crates not present in android builds
[target.'cfg(not(target_os = "android"))'.dependencies]
bincode = "=0.9.0"
env_logger = "=0.8.4"
tempfile = "2.2.0"

View file

@ -0,0 +1,3 @@
akailash@google.com
auradkar@google.com
takayas@google.com

View file

@ -0,0 +1,13 @@
service prefetch_record /system/bin/prefetch record --duration ${ro.prefetch_boot.duration_s:-0}
class main
user root
group root system
disabled
oneshot
service prefetch_replay /system/bin/prefetch replay --io-depth ${ro.prefetch_boot.io_depth:-2} --max-fds ${ro.prefetch_boot.max_fds:-128}
class main
user root
group root system
disabled
oneshot

View file

@ -0,0 +1,108 @@
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
pub(crate) static DEFAULT_IO_DEPTH: u16 = 2;
pub(crate) static DEFAULT_MAX_FDS: u16 = 128;
pub(crate) static DEFAULT_EXIT_ON_ERROR: bool = false;
mod args_argh;
use args_argh as args_internal;
use std::path::Path;
use std::path::PathBuf;
use std::process::exit;
pub use args_internal::OutputFormat;
pub use args_internal::ReplayArgs;
pub use args_internal::TracerType;
pub use args_internal::{DumpArgs, MainArgs, RecordArgs, SubCommands};
use serde::Deserialize;
use serde::Serialize;
use crate::Error;
use log::error;
// Deserialized form of the config file
#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
pub struct ConfigFile {
// Files to be excluded in prefetch. These files might have been
// added in the record file while recording,but we do not want to
// replay these files. These can be two types of files:
// 1) installation-specific files (e.g. files in /data) and
// 2) large files which we do not want to load in replay (e.g. APK files).
pub files_to_exclude_regex: Vec<String>,
// Files that are not in the record file, but need to be loaded during replay
pub additional_replay_files: Vec<String>,
}
fn verify_and_fix(args: &mut MainArgs) -> Result<(), Error> {
match &mut args.nested {
SubCommands::Record(arg) => {
if arg.debug && arg.int_path.is_none() {
arg.int_path = Some(PathBuf::from(format!("{}.int", arg.path.to_str().unwrap())));
}
if let Some(p) = &arg.int_path {
ensure_path_doesnt_exist(p)?;
}
}
SubCommands::Replay(arg) => {
ensure_path_exists(&arg.path)?;
if !arg.config_path.as_os_str().is_empty() {
ensure_path_exists(&arg.config_path)?;
}
}
SubCommands::Dump(arg) => {
ensure_path_exists(&arg.path)?;
}
}
Ok(())
}
/// Returns error if the given path at `p` exist.
pub(crate) fn ensure_path_doesnt_exist(p: &Path) -> Result<(), Error> {
if p.exists() {
Err(Error::InvalidArgs {
arg_name: "path".to_string(),
arg_value: p.display().to_string(),
error: "Path already exists".to_string(),
})
} else {
Ok(())
}
}
/// Returns error if the given path at `p` doesn't exist.
pub(crate) fn ensure_path_exists(p: &Path) -> Result<(), Error> {
if p.is_file() {
Ok(())
} else {
Err(Error::InvalidArgs {
arg_name: "path".to_string(),
arg_value: p.display().to_string(),
error: "Path does not exist".to_string(),
})
}
}
/// Builds `MainArgs` from command line arguments. On error prints error/help message
/// and exits.
pub fn args_from_env() -> MainArgs {
let mut args = args_internal::args_from_env();
if let Err(e) = verify_and_fix(&mut args) {
error!("failed to verify args: {}", e);
exit(1);
}
args
}

View file

@ -0,0 +1,217 @@
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::{option::Option, path::PathBuf, result::Result::Ok, str::FromStr};
use argh::FromArgs;
use serde::Deserialize;
use crate::args::DEFAULT_EXIT_ON_ERROR;
use crate::args::DEFAULT_IO_DEPTH;
use crate::args::DEFAULT_MAX_FDS;
use crate::Error;
/// prefetch-rs
#[derive(Eq, PartialEq, Debug, Default, FromArgs)]
pub struct MainArgs {
/// subcommands
#[argh(subcommand)]
pub nested: SubCommands,
}
/// Sub commands for prefetch functions
#[derive(Eq, PartialEq, Debug, FromArgs)]
#[argh(subcommand)]
pub enum SubCommands {
/// Records prefetch data.
Record(RecordArgs),
/// Replays from prefetch data
Replay(ReplayArgs),
/// Dump prefetch data in human readable format
Dump(DumpArgs),
}
impl Default for SubCommands {
fn default() -> Self {
Self::Dump(DumpArgs::default())
}
}
fn default_path() -> PathBuf {
PathBuf::from("/metadata/prefetch/prefetch.pack")
}
fn parse_tracing_instance(value: &str) -> Result<Option<String>, String> {
Ok(Some(value.to_string()))
}
#[derive(Eq, PartialEq, Debug, Default, FromArgs)]
/// Records prefect data.
#[argh(subcommand, name = "record")]
pub struct RecordArgs {
/// duration in seconds to record the data
///
/// On Android, if duration count is set to zero, recording
/// will continue until the property sys.boot_completed = 1.
#[argh(option)]
pub duration: u16,
/// file path where the records will be written to
///
/// A new file is created at the given path. If the path exists, it
/// will be overwritten
#[argh(option, default = "default_path()")]
pub path: PathBuf,
/// when set an intermediate file will be created that provides more information
/// about collected data.
#[argh(option, default = "false")]
pub debug: bool,
/// file path where the intermediate file will be written to
///
/// A new file is created at the given path. Errors out if the file
/// already exists.
#[argh(option)]
pub int_path: Option<PathBuf>,
/// size of the trace buffer which holds trace events. We need larger
/// buffer on a system that has faster disks or has large number of events
/// enabled. Defaults to TRACE_BUFFER_SIZE_KIB KiB.
#[argh(option, long = "trace-buffer-size")]
pub trace_buffer_size_kib: Option<u64>,
/// trace subsystem to use. "mem" subsystem is set by default.
#[argh(option, default = "Default::default()")]
pub tracing_subsystem: TracerType,
/// if true enables all the needed trace events. And at the end it restores
/// the values of those events.
/// If false, assumes that user has setup the needed trace events.
#[argh(option, default = "true")]
pub setup_tracing: bool,
/// if specified, works on a tracing instance (like /sys/kernel/tracing/instance/my_instance)
/// rather than using on shared global instance (i.e. /sys/kernel/tracing)."
#[argh(
option,
default = "Some(\"prefetch\".to_string())",
from_str_fn(parse_tracing_instance)
)]
pub tracing_instance: Option<String>,
}
/// Type of tracing subsystem to use.
#[derive(Deserialize, Clone, Eq, PartialEq, Debug)]
pub enum TracerType {
/// mem tracing subsystem relies on when a file's in-memory page gets added to the fs cache.
Mem,
}
impl FromStr for TracerType {
type Err = Error;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
Ok(match s.to_lowercase().as_str() {
"mem" => Self::Mem,
_ => {
return Err(Error::InvalidArgs {
arg_name: "tracing_subsystem".to_owned(),
arg_value: s.to_owned(),
error: "unknown value".to_owned(),
})
}
})
}
}
impl Default for TracerType {
fn default() -> Self {
Self::Mem
}
}
#[derive(Eq, PartialEq, Debug, Default, FromArgs)]
/// Prefetch data from the recorded file.
#[argh(subcommand, name = "replay")]
pub struct ReplayArgs {
/// file path from where the records will be read
#[argh(option, default = "default_path()")]
pub path: PathBuf,
/// IO depth. Number of IO that can go in parallel.
#[argh(option, long = "io-depth", default = "DEFAULT_IO_DEPTH")]
pub io_depth: u16,
/// max number of open fds to cache
#[argh(option, arg_name = "max-fds", default = "DEFAULT_MAX_FDS")]
pub max_fds: u16,
/// if true, command exits on encountering any error.
///
/// This defaults to false as there is not harm prefetching if we encounter
/// non-fatal errors.
#[argh(option, default = "DEFAULT_EXIT_ON_ERROR")]
pub exit_on_error: bool,
/// file path from where the prefetch config file will be read
#[argh(option, default = "PathBuf::new()")]
pub config_path: PathBuf,
}
/// dump records file in given format
#[derive(Eq, PartialEq, Debug, Default, FromArgs)]
#[argh(subcommand, name = "dump")]
pub struct DumpArgs {
/// file path from where the records will be read
#[argh(option)]
pub path: PathBuf,
/// output format. One of json or csv.
/// Note: In csv format, few fields are excluded from the output.
#[argh(option)]
pub format: OutputFormat,
}
#[derive(Deserialize, Eq, PartialEq, Debug)]
pub enum OutputFormat {
Json,
Csv,
}
impl FromStr for OutputFormat {
type Err = Error;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
Ok(match s.to_lowercase().as_str() {
"csv" => Self::Csv,
"json" => Self::Json,
_ => {
return Err(Error::InvalidArgs {
arg_name: "format".to_owned(),
arg_value: s.to_owned(),
error: "unknown value".to_owned(),
})
}
})
}
}
impl Default for OutputFormat {
fn default() -> Self {
Self::Json
}
}
/// Build args struct from command line arguments
pub fn args_from_env() -> MainArgs {
argh::from_env()
}

View file

@ -0,0 +1,187 @@
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use thiserror::Error;
use crate::{format::FileId, InodeInfo};
/// Enumerates all possible errors returned by this library.
#[derive(Debug, Error)]
pub enum Error {
/// Represents a failure to open a file.
#[error("Open error: {path}: {source}")]
Open {
/// The IO error
source: std::io::Error,
/// Path on which the operation failed.
path: String,
},
/// Represents a failure to create a file.
#[error("Create error. {path} {source}")]
Create {
/// The IO error
source: std::io::Error,
/// Path on which the operation failed.
path: String,
},
/// Represents a failure to read trace file.
#[error("Read error. {error}")]
Read {
/// Detailed error message.
error: String,
},
/// Represents a failure to write to a file.
#[error("Write error. {source}")]
Write {
/// The IO error
source: std::io::Error,
/// file path
path: String,
},
/// Represents a failure to delete a file.
#[error("Delete error. {path} {source}")]
Delete {
/// The IO error
source: std::io::Error,
/// Path on which the operation failed.
path: String,
},
/// Represents a failure to stat a file.
#[error("Stat error. {path} {source}")]
Stat {
/// The IO error
source: std::io::Error,
/// Path on which the operation failed.
path: String,
},
/// Represents a failure to stat a file.
#[error("clone failed. {id} {source}")]
FileClone {
/// The IO error
source: std::io::Error,
/// File id for which we could not clone the file.
id: FileId,
},
/// Represents a failure to mmap a file.
#[error("mmap failed. {path} {error}")]
Mmap {
/// Detailed error message.
error: String,
/// Path on which the operation failed.
path: String,
},
/// Represents a failure to munmap a file.
#[error("munmap failed. {length} {error}")]
Munmap {
/// Detailed error message.
error: String,
/// Size of file which this munmap failed
length: usize,
},
/// Represents all other cases of `std::io::Error`.
///
#[error(transparent)]
IoError(
/// The IO error
#[from]
std::io::Error,
),
/// Represents a failure to map FileId to path
///
#[error("Failed to map id to path: {id}")]
IdNoFound {
/// File id for which path lookup failed.
id: FileId,
},
/// Indicates that the file is skipped for prefetching
/// because it is in the exclude files list.
///
#[error("Skipped prefetching file from path: {path}")]
SkipPrefetch {
/// Path to file for which prefetching is skipped.
path: String,
},
/// Represents spurious InodeInfo or missing Record.
///
#[error(
"Stale inode(s) info found.\n\
missing_file_ids: {missing_file_ids:#?}\n\
stale_inodes: {stale_inodes:#?} \n\
missing_paths:{missing_paths:#?}"
)]
StaleInode {
/// FileIds for which InodeInfo is missing.
missing_file_ids: Vec<FileId>,
/// InodeInfos for which no records exist.
stale_inodes: Vec<InodeInfo>,
/// InodeInfos in which no paths were found.
missing_paths: Vec<InodeInfo>,
},
/// Represents a failure to serialize records file.
#[error("Serialize error: {error}")]
Serialize {
/// Detailed error message.
error: String,
},
/// Represents a failure to deserialize records file.
#[error("Deserialize error: {error}")]
Deserialize {
/// Detailed error message.
error: String,
},
/// Represents a failure from thread pool.
#[error("Thread pool error: {error}")]
ThreadPool {
/// Detailed error message.
error: String,
},
/// Represents a failure to setup file.
#[error("Failed to setup prefetch: {error}")]
Custom {
/// Detailed error message.
error: String,
},
/// Represents a failure to parse args.
#[error("Failed to parse arg:{arg_name} value:{arg_value} error:{error}")]
InvalidArgs {
/// Arg name.
arg_name: String,
/// Arg value.
arg_value: String,
/// Detailed error message.
error: String,
},
}

View file

@ -0,0 +1,823 @@
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::cmp::{max, min};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt;
use std::fmt::Display;
use std::fs::{File, Metadata, OpenOptions};
use std::hash::Hash;
use std::io::Write;
use std::ops::{Deref, DerefMut};
use std::os::unix::fs::MetadataExt;
use std::time::SystemTime;
use crc32fast::Hasher;
use log::debug;
use regex::Regex;
use serde::Deserializer;
use serde::Serialize;
use serde::{Deserialize, Serializer};
use crate::error::Error;
static MAGIC_UUID: [u8; 16] = [
0x10, 0x54, 0x3c, 0xb8, 0x60, 0xdb, 0x49, 0x45, 0xa1, 0xd5, 0xde, 0xa7, 0xd2, 0x3b, 0x05, 0x49,
];
static MAJOR_VERSION: u16 = 0;
static MINOR_VERSION: u16 = 1;
/// Represents inode number which is unique within a filesystem.
pub(crate) type InodeNumber = u64;
/// Represents device number which is unique for given block device.
pub(crate) type DeviceNumber = u64;
/// Convenience name for string that represents a path.
pub(crate) type PathString = String;
/// Represents unique file id across filesystems.
#[derive(Clone, Debug, Deserialize, Eq, Hash, Default, PartialEq, PartialOrd, Ord, Serialize)]
pub struct FileId(pub u64);
impl Display for FileId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
fn serialize_hashmap<S, K: Ord + Serialize + Clone, V: Serialize + Clone>(
value: &HashMap<K, V>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut btree = BTreeMap::new();
for (k, v) in value {
btree.insert(k.clone(), v.clone());
}
btree.serialize(serializer)
}
#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
pub(crate) struct SerializableHashMap<
K: Ord + Serialize + Clone + Hash + PartialEq,
V: Serialize + Clone,
> {
#[serde(serialize_with = "serialize_hashmap")]
pub map: HashMap<K, V>,
}
impl<K, V> Deref for SerializableHashMap<K, V>
where
K: Ord + Serialize + Clone + Hash + PartialEq,
V: Serialize + Clone,
{
type Target = HashMap<K, V>;
fn deref(&self) -> &Self::Target {
&self.map
}
}
impl<K, V> DerefMut for SerializableHashMap<K, V>
where
K: Ord + Serialize + Clone + Hash + PartialEq,
V: Serialize + Clone,
{
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.map
}
}
/// The InodeInfo is unique per (device, inode) combination. It is
/// used to verify that we are prefetching a file for which we generated
/// the records for.
/// `Record` refers to this information with a unique `FileId`.
#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
pub struct InodeInfo {
// Inode number of the file.
pub(crate) inode_number: InodeNumber,
// File size in bytes.
pub(crate) file_size: u64,
// Helps to get to a file from a Record. The field is used to get to the file
// that needs to be prefetched.
//
// This struct is built by getting data from trace lines and querying filesystem
// for other fields about the file/inode.
//
// One instance per file to be prefetched. A file/inode can have multiple paths.
// We store multiple paths so that we can still get to it if some of the
// paths get deleted.
//
// See comments for `Record`.
#[serde(deserialize_with = "check_inode_info_paths")]
pub(crate) paths: Vec<PathString>,
// Block device number on which the file is located.
pub(crate) device_number: DeviceNumber,
}
impl InodeInfo {
/// Returns InodeInfo.
pub fn new(
inode_number: InodeNumber,
file_size: u64,
paths: Vec<String>,
device_number: DeviceNumber,
) -> Self {
Self { inode_number, file_size, paths, device_number }
}
}
// Helps us check block alignment.
//
// A records file can have multiple FsInfos.
#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
pub struct FsInfo {
// This is filesystem block size and is not underlying device's block size
pub(crate) block_size: u64,
}
/// Prefetch record.
/// Each record translates to one filesystem `read()` request.
///
/// Tracer builds `Record` by parsing trace lines or by querying filesystem.
///
/// Multiple `Record`s can belong to a single InodeInfo. For example if there were two
/// reads for file `/data/my.apk` which is assigned FileId 10 at offsets 0 and 8k of length
/// 1 byte each then we will have two `Records` in `RecordsFile` that look like
/// `Record {file_id: 10, offset: 0, length: 1, timestamp: t1}`
/// `Record {file_id: 10, offset: 8192, length: 1, timestamp: t2}`
#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)]
pub struct Record {
/// Points to the file that should be fetched./ file_id is unique per `InodeInfo`
/// in a `RecordsFile`
pub file_id: FileId,
/// start offset to fetch data from. This is FsInfo.block_size aligned.
pub offset: u64,
/// length of the read. This is generally rounded up to Fs.Info.block_size
/// except when the rounding up crosses `InodeInfo.file_size`
pub length: u64,
/// Timestamp in nanoseconds since the start when the data was loaded.
pub timestamp: u64,
}
impl Record {
/// Returns a new record if two records belong to same file and overlap.
fn overlaps(&self, other: &Self) -> Option<Self> {
if self.file_id == other.file_id {
let self_start = self.offset;
let self_end = self.offset + self.length;
let other_start = other.offset;
let other_end = other.offset + other.length;
if (self_start <= other_end) && (self_end >= other_start) {
let offset = min(self_start, other_start);
let length = max(self_end, other_end) - offset;
return Some(Self {
file_id: self.file_id.clone(),
offset,
length,
timestamp: min(self.timestamp, other.timestamp),
});
}
}
None
}
}
fn group_record_by_file_id(records: Vec<Record>) -> Vec<Record> {
let mut map: HashMap<FileId, BTreeMap<u64, Record>> = HashMap::new();
for record in &records {
let recs = map.entry(record.file_id.clone()).or_default();
recs.entry(record.offset).or_insert_with(|| record.clone());
}
let mut grouped = vec![];
for record in &records {
if let Some(inode) = map.get(&record.file_id) {
for rec in inode.values() {
grouped.push(rec.clone());
}
}
let _ = map.remove(&record.file_id);
}
grouped
}
/// When records are coalesced, because their file ids match and IO offsets overlap, the least
/// timestamp of the coalesced records is retained.
pub(crate) fn coalesce_records(records: Vec<Record>, group_by_file_id: bool) -> Vec<Record> {
let records = if group_by_file_id { group_record_by_file_id(records) } else { records };
let mut coalesced = vec![];
let mut current: Option<Record> = None;
for r in records {
current = match current {
None => Some(r),
Some(c) => {
let merged = c.overlaps(&r);
match merged {
None => {
coalesced.push(c);
Some(r)
}
Some(m) => Some(m),
}
}
}
}
if let Some(r) = current {
coalesced.push(r);
}
coalesced
}
// Records file header.
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct Header {
/// magic number as uuid to identify the header/format.
#[serde(deserialize_with = "check_magic")]
magic: [u8; 16],
// major version number.
#[serde(deserialize_with = "check_major_number")]
major_number: u16,
// minor version number.
#[serde(deserialize_with = "check_minor_number")]
minor_number: u16,
/// timestamp when the records file was generated.
date: SystemTime,
/// Checksum of the `RecordsFile` with `digest` being empty vector.
digest: u32,
}
fn check_version_number<'de, D>(
deserializer: D,
expected: u16,
version_type: &str,
) -> Result<u16, D::Error>
where
D: Deserializer<'de>,
{
let found = u16::deserialize(deserializer)?;
if expected != found {
return Err(serde::de::Error::custom(format!(
"Failed to parse {} version. Expected: {} Found: {}",
version_type, expected, found
)));
}
Ok(found)
}
fn check_major_number<'de, D>(deserializer: D) -> Result<u16, D::Error>
where
D: Deserializer<'de>,
{
check_version_number(deserializer, MAJOR_VERSION, "major")
}
fn check_minor_number<'de, D>(deserializer: D) -> Result<u16, D::Error>
where
D: Deserializer<'de>,
{
check_version_number(deserializer, MINOR_VERSION, "minor")
}
fn check_magic<'de, D>(deserializer: D) -> Result<[u8; 16], D::Error>
where
D: Deserializer<'de>,
{
let found: [u8; 16] = <[u8; 16]>::deserialize(deserializer)?;
if found != MAGIC_UUID {
return Err(serde::de::Error::custom(format!(
"Failed to parse magic number. Expected: {:?} Found: {:?}",
MAGIC_UUID, found
)));
}
Ok(found)
}
fn check_inode_info_paths<'de, D>(deserializer: D) -> Result<Vec<PathString>, D::Error>
where
D: Deserializer<'de>,
{
let parsed: Vec<PathString> = Vec::deserialize(deserializer)?;
if parsed.is_empty() {
return Err(serde::de::Error::custom("No paths found for in InodeInfo"));
}
Ok(parsed)
}
// Helper inner struct of RecordsFile meant to verify checksum.
#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
pub(crate) struct RecordsFileInner {
// One instance per mounted block device.
pub(crate) filesystems: SerializableHashMap<DeviceNumber, FsInfo>,
/// Helps to get to a file path from a given `FileId`.
/// One instance per file to be prefetched.
pub(crate) inode_map: SerializableHashMap<FileId, InodeInfo>,
/// Helps to get to a file and offset to be replayed..
///
// The records are chronologically arranged meaning the data that
// needs first is at the beginning of the vector and the data that
// needs last is at the end.
//
// One instance per part of the file that needs to be prefetched.
pub records: Vec<Record>,
}
/// Deserialized form of records file.
#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
#[serde(remote = "Self")]
pub struct RecordsFile {
/// Helps the prefetch tool to parse rest of the file
pub header: Header,
/// Helps the prefetch tool to verify checksum.
pub(crate) inner: RecordsFileInner,
}
impl RecordsFile {
/// Given file id, looks up path of the file and returns open File handle.
pub fn open_file(&self, id: FileId, exclude_files_regex: &[Regex]) -> Result<File, Error> {
if let Some(inode) = self.inner.inode_map.get(&id) {
let path = inode.paths.first().unwrap();
for regex in exclude_files_regex {
if regex.is_match(path) {
return Err(Error::SkipPrefetch { path: path.to_owned() });
}
}
debug!("Opening {} file {}", id.0, path);
OpenOptions::new()
.read(true)
.write(false)
.open(path)
.map_err(|source| Error::Open { source, path: path.to_owned() })
} else {
Err(Error::IdNoFound { id })
}
}
/// Inserts given record in RecordsFile
pub fn insert_record(&mut self, records: Record) {
self.inner.records.push(records);
}
/// Inserts given InodeInfo into in RecordsFile.
pub fn insert_or_update_inode_info(&mut self, id: FileId, info: InodeInfo) {
if let Some(inode) = self.inner.inode_map.get_mut(&id) {
if let Some(first_path) = info.paths.first() {
inode.paths.push(first_path.clone());
}
} else {
self.inner.inode_map.insert(id, info);
}
}
/// Verifies the integrity of records file.
///
/// check saves us from serializing a improperly built record file or replaying an inconsistent
/// `RecordFile`.
///
/// Note: check only works on the `RecordsFile` and doesn't access filesystem. We limit the
/// scope so that we avoid issuing filesystem operations(directory lookup, stats) twice - once
/// during check and once during replaying.
pub fn check(&self) -> Result<(), Error> {
let mut unique_files = HashSet::new();
let mut missing_file_ids = vec![];
for record in &self.inner.records {
if !self.inner.inode_map.contains_key(&record.file_id) {
missing_file_ids.push(record.file_id.clone());
}
unique_files.insert(record.file_id.clone());
}
let mut stale_inodes = vec![];
let mut missing_paths = vec![];
for (file_id, inode_info) in &self.inner.inode_map.map {
if inode_info.paths.is_empty() {
missing_paths.push(inode_info.clone());
}
if !unique_files.contains(file_id) {
stale_inodes.push(inode_info.clone());
}
}
if !stale_inodes.is_empty() || !missing_paths.is_empty() || !missing_file_ids.is_empty() {
return Err(Error::StaleInode { stale_inodes, missing_paths, missing_file_ids });
}
Ok(())
}
/// Builds InodeInfo from args and inserts inode info in RecordsFile.
pub fn insert_or_update_inode(&mut self, id: FileId, stat: &Metadata, path: PathString) {
self.insert_or_update_inode_info(
id,
InodeInfo {
inode_number: stat.ino(),
file_size: stat.len(),
paths: vec![path],
device_number: stat.dev(),
},
)
}
/// Serialize records in the form of csv.
pub fn serialize_records_to_csv(&self, writer: &mut dyn Write) -> Result<(), Error> {
let mut wtr = csv::Writer::from_writer(writer);
#[derive(Serialize)]
struct TempRecord<'a> {
timestamp: u64,
file: &'a PathString,
offset: u64,
length: u64,
file_size: u64,
}
for record in &self.inner.records {
if let Some(inode_info) = self.inner.inode_map.get(&record.file_id) {
let mut inode_info = inode_info.clone();
inode_info.paths.sort();
if let Some(first_path) = inode_info.paths.first().cloned() {
// Clone the &String inside Option
let record = TempRecord {
timestamp: record.timestamp,
file: &first_path, // Now you have &String
offset: record.offset,
length: record.length,
file_size: inode_info.file_size,
};
wtr.serialize(&record)
.map_err(|e| Error::Serialize { error: e.to_string() })?;
}
}
}
wtr.flush()?;
Ok(())
}
fn compute_digest(&mut self) -> Result<u32, Error> {
self.header.digest = Default::default();
let serialized = serde_cbor::to_vec(self)
.map_err(|source| Error::Serialize { error: source.to_string() })?;
let mut hasher = Hasher::new();
hasher.update(&serialized);
Ok(hasher.finalize())
}
/// Convenience wrapper around serialize that adds checksum/digest to the file
/// to verify file consistency during replay/deserialize.
pub fn add_checksum_and_serialize(&mut self) -> Result<Vec<u8>, Error> {
self.header.digest = self.compute_digest()?;
serde_cbor::to_vec(self).map_err(|source| Error::Serialize { error: source.to_string() })
}
}
impl Default for Header {
fn default() -> Self {
Self {
major_number: MAJOR_VERSION,
minor_number: MINOR_VERSION,
date: SystemTime::now(),
digest: 0,
magic: MAGIC_UUID,
}
}
}
// Wrapper around deserialize to check any inconsistencies in the file format.
impl<'de> Deserialize<'de> for RecordsFile {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let rf = Self::deserialize(deserializer)?;
rf.check().map_err(|e| {
serde::de::Error::custom(format!("failed to validate records file: {}", e))
})?;
let mut zero_digest = rf.clone();
zero_digest.header.digest = 0;
let digest =
zero_digest.compute_digest().map_err(|e| serde::de::Error::custom(format!("{}", e)))?;
if digest != rf.header.digest {
return Err(serde::de::Error::custom(format!(
"file consistency check failed. Expected: {}. Found: {}",
digest, rf.header.digest
)));
}
Ok(rf)
}
}
// Wrapper around serialize to check any inconsistencies in the file format before serializing
impl Serialize for RecordsFile {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: Serializer,
{
self.check().map(|_| self).map_err(|e| {
serde::ser::Error::custom(format!("failed to validate records file: {}", e))
})?;
Self::serialize(self, serializer)
}
}
#[cfg(test)]
pub mod tests {
use std::assert_eq;
use super::*;
#[test]
fn test_major_version_mismatch() {
let mut rf = RecordsFile::default();
rf.header.major_number += 1;
let serialized: Result<RecordsFile, serde_cbor::Error> =
serde_cbor::from_slice(&serde_cbor::to_vec(&rf).unwrap());
assert_eq!(
serialized.unwrap_err().to_string(),
format!(
"Failed to parse major version. Expected: {} Found: {}",
MAJOR_VERSION,
MAJOR_VERSION + 1
)
);
}
#[test]
fn test_minor_version_mismatch() {
let mut rf = RecordsFile::default();
rf.header.minor_number += 1;
let serialized: Result<RecordsFile, serde_cbor::Error> =
serde_cbor::from_slice(&serde_cbor::to_vec(&rf).unwrap());
assert_eq!(
serialized.unwrap_err().to_string(),
format!(
"Failed to parse minor version. Expected: {} Found: {}",
MINOR_VERSION,
MINOR_VERSION + 1
)
);
}
#[test]
fn deserialize_inode_info_without_path() {
let inode = InodeInfo { inode_number: 1, file_size: 10, paths: vec![], device_number: 1 };
let serialized = serde_cbor::to_vec(&inode).unwrap();
let deserialized: Result<InodeInfo, serde_cbor::Error> =
serde_cbor::from_slice(&serialized);
assert_eq!(
deserialized.unwrap_err().to_string(),
"No paths found for in InodeInfo".to_owned()
);
}
#[test]
fn test_serialize_records_to_csv() {
let mut rf = RecordsFile::default();
let file_count = 4;
for i in 0..file_count {
rf.insert_or_update_inode_info(
FileId(i),
InodeInfo {
inode_number: i,
file_size: i * 10,
paths: vec![format!("/hello/{}", i)],
device_number: i + 10,
},
)
}
for i in 0..10 {
rf.insert_record(Record {
file_id: FileId(i % file_count),
offset: i * 3,
length: i + 4,
timestamp: i * file_count,
});
}
let mut buf = vec![];
rf.serialize_records_to_csv(&mut buf).unwrap();
let data = String::from_utf8(buf).unwrap();
assert_eq!(
data,
"timestamp,file,offset,length,file_size\n\
0,/hello/0,0,4,0\n\
4,/hello/1,3,5,10\n\
8,/hello/2,6,6,20\n\
12,/hello/3,9,7,30\n\
16,/hello/0,12,8,0\n\
20,/hello/1,15,9,10\n\
24,/hello/2,18,10,20\n\
28,/hello/3,21,11,30\n\
32,/hello/0,24,12,0\n\
36,/hello/1,27,13,10\n"
);
}
fn new_record(file: u64, offset: u64, length: u64, timestamp: u64) -> Record {
Record { file_id: FileId(file), offset, length, timestamp }
}
#[test]
fn test_coalesced_without_group() {
let non_coalescable_same_inode =
vec![new_record(1, 2, 3, 4), new_record(1, 6, 3, 5), new_record(1, 10, 3, 6)];
assert_eq!(
coalesce_records(non_coalescable_same_inode.clone(), false),
non_coalescable_same_inode
);
let non_coalescable_different_inode =
vec![new_record(1, 2, 3, 4), new_record(2, 5, 3, 5), new_record(3, 8, 3, 6)];
assert_eq!(
coalesce_records(non_coalescable_different_inode.clone(), false),
non_coalescable_different_inode
);
let some_coalesced =
vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(3, 8, 3, 6)];
assert_eq!(
coalesce_records(some_coalesced, false),
vec![new_record(1, 2, 6, 4), new_record(3, 8, 3, 6),]
);
let coalesced_into_one =
vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(1, 8, 3, 6)];
assert_eq!(coalesce_records(coalesced_into_one, false), vec![new_record(1, 2, 9, 4)]);
let no_grouping_or_coalescing =
vec![new_record(1, 2, 3, 4), new_record(3, 8, 3, 5), new_record(1, 5, 3, 6)];
assert_eq!(
coalesce_records(no_grouping_or_coalescing, false),
vec![new_record(1, 2, 3, 4), new_record(3, 8, 3, 5), new_record(1, 5, 3, 6),]
);
}
#[test]
fn test_coalesced_with_grouping() {
let non_coalescable_same_inode =
vec![new_record(1, 2, 3, 4), new_record(1, 6, 3, 5), new_record(1, 10, 3, 6)];
assert_eq!(
coalesce_records(non_coalescable_same_inode.clone(), true),
non_coalescable_same_inode
);
let non_coalescable_different_inode =
vec![new_record(1, 2, 3, 4), new_record(2, 5, 3, 5), new_record(3, 8, 3, 6)];
assert_eq!(
coalesce_records(non_coalescable_different_inode.clone(), true),
non_coalescable_different_inode
);
let some_coalesced =
vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(3, 8, 3, 6)];
assert_eq!(
coalesce_records(some_coalesced, true),
vec![new_record(1, 2, 6, 4), new_record(3, 8, 3, 6),]
);
let coalesced_into_one =
vec![new_record(1, 2, 3, 4), new_record(1, 5, 3, 5), new_record(1, 8, 3, 6)];
assert_eq!(coalesce_records(coalesced_into_one, true), vec![new_record(1, 2, 9, 4)]);
let some_grouped_coalesced =
vec![new_record(1, 2, 3, 4), new_record(3, 8, 3, 5), new_record(1, 5, 3, 6)];
assert_eq!(
coalesce_records(some_grouped_coalesced, true),
vec![new_record(1, 2, 6, 4), new_record(3, 8, 3, 5),]
);
}
#[test]
fn check_missing_records() {
let mut rf = RecordsFile::default();
rf.inner.inode_map.insert(
FileId(0),
InodeInfo {
inode_number: 0,
file_size: 1,
paths: vec!["hello".to_owned()],
device_number: 2,
},
);
rf.insert_record(Record { file_id: FileId(0), offset: 10, length: 20, timestamp: 30 });
rf.inner.inode_map.insert(
FileId(1),
InodeInfo {
inode_number: 1,
file_size: 2,
paths: vec!["world".to_owned()],
device_number: 3,
},
);
let e = rf.check().unwrap_err();
assert_eq!(
e.to_string(),
"Stale inode(s) info found.\n\
missing_file_ids: []\n\
stale_inodes: [\n \
InodeInfo {\n \
inode_number: 1,\n \
file_size: 2,\n \
paths: [\n \"world\",\n ],\n \
device_number: 3,\n },\n] \n\
missing_paths:[]"
);
}
#[test]
fn check_missing_file() {
let mut rf = RecordsFile::default();
rf.inner.inode_map.insert(
FileId(0),
InodeInfo {
inode_number: 0,
file_size: 1,
paths: vec!["hello".to_owned()],
device_number: 2,
},
);
rf.insert_record(Record { file_id: FileId(0), offset: 10, length: 20, timestamp: 30 });
rf.insert_record(Record { file_id: FileId(1), offset: 10, length: 20, timestamp: 30 });
let e = rf.check().unwrap_err();
assert_eq!(
e.to_string(),
"Stale inode(s) info found.\n\
missing_file_ids: [\n \
FileId(\n 1,\n ),\n]\n\
stale_inodes: [] \n\
missing_paths:[]"
);
}
#[test]
fn check_missing_paths() {
let mut rf = RecordsFile::default();
rf.inner.inode_map.insert(
FileId(0),
InodeInfo { inode_number: 0, file_size: 1, paths: vec![], device_number: 2 },
);
rf.insert_record(Record { file_id: FileId(0), offset: 10, length: 20, timestamp: 30 });
let e = rf.check().unwrap_err();
assert_eq!(
e.to_string(),
"Stale inode(s) info found.\n\
missing_file_ids: []\n\
stale_inodes: [] \n\
missing_paths:[\n \
InodeInfo {\n \
inode_number: 0,\n \
file_size: 1,\n \
paths: [],\n \
device_number: 2,\n },\n]"
);
}
}

View file

@ -0,0 +1,186 @@
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! A library to prefetch files on the file system to optimize startup times
//!
mod args;
mod error;
mod format;
mod replay;
mod tracer;
use std::fs::File;
use std::fs::OpenOptions;
use std::io;
use std::io::Write;
use std::os::unix::fs::PermissionsExt;
use std::string::ToString;
use std::thread;
use std::time::Duration;
#[cfg(target_os = "android")]
use log::Level;
#[cfg(target_os = "linux")]
use log::LevelFilter;
pub use args::args_from_env;
use args::OutputFormat;
pub use args::ReplayArgs;
pub use args::{DumpArgs, MainArgs, RecordArgs, SubCommands};
pub use error::Error;
pub use format::FileId;
pub use format::InodeInfo;
pub use format::Record;
pub use format::RecordsFile;
use log::info;
#[cfg(target_os = "android")]
use log::warn;
pub use replay::Replay;
pub use tracer::nanoseconds_since_boot;
#[cfg(target_os = "android")]
use rustutils::system_properties;
#[cfg(target_os = "android")]
use rustutils::system_properties::error::PropertyWatcherError;
#[cfg(target_os = "android")]
use rustutils::system_properties::PropertyWatcher;
#[cfg(target_os = "android")]
fn wait_for_property_true(property_name: &str) -> Result<(), PropertyWatcherError> {
let mut prop = PropertyWatcher::new(property_name)?;
loop {
prop.wait(None)?;
if system_properties::read_bool(property_name, false)? {
break;
}
}
Ok(())
}
/// Records prefetch data for the given configuration
pub fn record(args: &RecordArgs) -> Result<(), Error> {
let (mut tracer, exit_tx) = tracer::Tracer::create(
args.trace_buffer_size_kib,
args.tracing_subsystem.clone(),
args.tracing_instance.clone(),
args.setup_tracing,
)?;
let duration = Duration::from_secs(args.duration as u64);
let thd = thread::spawn(move || {
if !duration.is_zero() {
info!("Record start - waiting for duration: {:?}", duration);
thread::sleep(duration);
} else {
#[cfg(target_os = "android")]
wait_for_property_true("sys.boot_completed").unwrap_or_else(|e| {
warn!("failed to wait for sys.boot_completed with error: {}", e)
});
}
// We want to unwrap here on failure to send this signal. Otherwise
// tracer will continue generating huge records data.
exit_tx.send(()).unwrap();
});
let mut rf = tracer.trace(args.int_path.as_ref())?;
thd.join()
.map_err(|_| Error::ThreadPool { error: "Failed to join timeout thread".to_string() })?;
let mut out_file =
OpenOptions::new().write(true).create(true).truncate(true).open(&args.path).map_err(
|source| Error::Create { source, path: args.path.to_str().unwrap().to_owned() },
)?;
std::fs::set_permissions(&args.path, std::fs::Permissions::from_mode(0o644))
.map_err(|source| Error::Create { source, path: args.path.to_str().unwrap().to_owned() })?;
out_file
.write_all(&rf.add_checksum_and_serialize()?)
.map_err(|source| Error::Write { path: args.path.to_str().unwrap().to_owned(), source })?;
Ok(())
}
/// Replays prefetch data for the given configuration
pub fn replay(args: &ReplayArgs) -> Result<(), Error> {
let replay = Replay::new(args)?;
replay.replay()
}
/// Dumps prefetch data in the human readable form
pub fn dump(args: &DumpArgs) -> Result<(), Error> {
let reader = File::open(&args.path)
.map_err(|source| Error::Open { source, path: args.path.to_str().unwrap().to_string() })?;
let rf: RecordsFile =
serde_cbor::from_reader(reader).map_err(|e| Error::Deserialize { error: e.to_string() })?;
match args.format {
OutputFormat::Json => println!(
"{:#}",
serde_json::to_string_pretty(&rf)
.map_err(|e| Error::Serialize { error: e.to_string() })?
),
OutputFormat::Csv => rf.serialize_records_to_csv(&mut io::stdout())?,
}
Ok(())
}
/// An alias of android_logger::Level to use log level across android and linux.
#[cfg(target_os = "android")]
pub type LogLevel = Level;
/// An alias of log::LevelFilter to use log level across android and linux.
#[cfg(not(target_os = "android"))]
pub type LogLevel = LevelFilter;
/// Convenience logging initializer that is shared between the prefetch tool and c wrapper library
#[cfg(target_os = "android")]
pub fn init_logging(_level: LogLevel) {
android_logger::init_once(
android_logger::Config::default().with_max_level(log::LevelFilter::Info).format(
|f, record| {
write!(
f,
"{} prefetch_rs: {}:{} {}: {}",
nanoseconds_since_boot(),
record.file().unwrap_or("unknown_file"),
record.line().unwrap_or(0),
record.level(),
record.args()
)
},
),
)
}
/// Convenience logging initializer that is shared between the prefetch tool and c wrapper library
#[cfg(target_os = "linux")]
pub fn init_logging(level: LogLevel) {
let mut builder = env_logger::Builder::from_default_env();
builder
.filter(None, level)
.format(|buf, record| {
writeln!(
buf,
"{} prefetch_rs: {}:{} {}: {}",
nanoseconds_since_boot(),
record.file().unwrap_or("unknown_file"),
record.line().unwrap_or(0),
record.level(),
record.args()
)
})
.init();
}

View file

@ -0,0 +1,41 @@
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! A utility wrapper around libprefetch that allows to record, replay and dump
//! prefetch data.
use log::error;
use prefetch_rs::args_from_env;
use prefetch_rs::dump;
use prefetch_rs::init_logging;
use prefetch_rs::record;
use prefetch_rs::replay;
use prefetch_rs::LogLevel;
use prefetch_rs::MainArgs;
use prefetch_rs::SubCommands;
fn main() {
init_logging(LogLevel::Debug);
let args: MainArgs = args_from_env();
let ret = match &args.nested {
SubCommands::Record(args) => record(args),
SubCommands::Replay(args) => replay(args),
SubCommands::Dump(args) => dump(args),
};
if let Err(err) = ret {
error!("{:?} command failed: {:?}", args, err);
}
}

View file

@ -0,0 +1,762 @@
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::clone::Clone;
use std::convert::TryInto;
use std::fmt::Display;
use std::mem::replace;
use std::os::unix::io::AsRawFd;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::RwLock;
use std::thread;
use log::debug;
use log::error;
use log::warn;
use lru_cache::LruCache;
use nix::errno::Errno;
use nix::fcntl::posix_fadvise;
use regex::Regex;
use crate::args::ConfigFile;
use crate::format::Record;
use crate::format::{FileId, RecordsFile};
use crate::Error;
use crate::ReplayArgs;
use libc::{c_void, off64_t, pread64};
use std::fs::File;
const READ_SZ: usize = 1024 * 1024;
struct ScopedLog<T: Display + Sized> {
msg: T,
thd_id: usize,
}
fn scoped_log<T: Display + Sized>(ctx: usize, msg: T) -> ScopedLog<T> {
let thd_id = ctx;
debug!("{} {} start", thd_id, msg);
ScopedLog { msg, thd_id }
}
impl<T: Display> Drop for ScopedLog<T> {
fn drop(&mut self) {
debug!("{} {} end", self.thd_id, self.msg);
}
}
fn readahead(
id: usize,
file: Arc<File>,
record: &Record,
buffer: &mut [u8; READ_SZ],
) -> Result<(), Error> {
debug!("readahead {:?}", record);
let _dbg = scoped_log(id, "readahead");
let mut current_offset: off64_t = record
.offset
.try_into()
.map_err(|_| Error::Read { error: "Failed to convert offset".to_string() })?;
let mut remaining_data: usize = record
.length
.try_into()
.map_err(|_| Error::Read { error: "Failed to convert length".to_string() })?;
while remaining_data > 0 {
let read_size = std::cmp::min(READ_SZ, remaining_data);
// SAFETY: This is safe because
// - the file is known to exist and opened
// - buffer is allocated upfront and is guaranteed by the fact it comes from a mutable slice reference.
// - read_size is guaranteed not to exceed length of the buffer.
let bytes_read = unsafe {
pread64(file.as_raw_fd(), buffer.as_mut_ptr() as *mut c_void, read_size, current_offset)
};
if bytes_read == -1 {
return Err(Error::Read { error: format!("readahead failed: {}", Errno::last_raw()) });
}
if bytes_read == 0 {
break; // End of file reached
}
current_offset += bytes_read as off64_t;
remaining_data -= bytes_read as usize;
}
// TODO: Try readahead() syscall or async I/O
Ok(())
}
fn worker_internal(
id: usize,
state: Arc<Mutex<SharedState>>,
records_file: Arc<RwLock<RecordsFile>>,
exit_on_error: bool,
exclude_files_regex: Vec<Regex>,
buffer: &mut [u8],
) -> Result<(), Error> {
loop {
let index = {
let mut state = state.lock().unwrap();
if state.result.is_err() {
return Ok(());
}
state.next_record()
};
let record = {
let rf = records_file.read().unwrap();
if index >= rf.inner.records.len() {
return Ok(());
}
rf.inner.records.get(index).unwrap().clone()
};
let _dbg = scoped_log(id, "record_replay");
let file = state.lock().unwrap().fds.get_mut(&record.file_id).map(|f| f.clone());
let file = match file {
Some(file) => file,
None => {
let file = Arc::new({
let file = records_file
.read()
.unwrap()
.open_file(record.file_id.clone(), &exclude_files_regex);
if let Err(e) = file {
if exit_on_error {
return Err(e);
} else {
match e {
Error::SkipPrefetch { path } => {
debug!("Skipping file during replay: {}", path);
}
_ => error!(
"Failed to open file id: {} with {}",
record.file_id.clone(),
e.to_string()
),
}
continue;
}
}
let file = file.unwrap();
// We do not want the filesystem be intelligent and prefetch more than what this
// code is reading. So turn off prefetch.
if let Err(e) = posix_fadvise(
file.as_raw_fd(),
0,
0,
nix::fcntl::PosixFadviseAdvice::POSIX_FADV_RANDOM,
) {
warn!(
"Failed to turn off filesystem read ahead for file id: {} with {}",
record.file_id.clone(),
e.to_string()
);
}
file
});
let cache_file = file.clone();
state.lock().unwrap().fds.insert(record.file_id.clone(), cache_file);
file
}
};
if let Err(e) = readahead(id, file, &record, buffer.try_into().unwrap()) {
if exit_on_error {
return Err(e);
} else {
error!(
"readahead failed on file id: {} with: {}",
record.file_id.clone(),
e.to_string()
);
continue;
}
}
}
}
fn worker(
id: usize,
state: Arc<Mutex<SharedState>>,
records_file: Arc<RwLock<RecordsFile>>,
exit_on_error: bool,
exclude_files_regex: Vec<Regex>,
buffer: &mut [u8],
) {
let _dbg = scoped_log(id, "read_loop");
let result = worker_internal(
id,
state.clone(),
records_file,
exit_on_error,
exclude_files_regex,
buffer,
);
if result.is_err() {
error!("worker failed with {:?}", result);
let mut state = state.lock().unwrap();
if state.result.is_ok() {
state.result = result;
}
}
}
#[derive(Debug)]
pub struct SharedState {
fds: LruCache<FileId, Arc<File>>,
records_index: usize,
result: Result<(), Error>,
}
impl SharedState {
fn next_record(&mut self) -> usize {
let ret = self.records_index;
self.records_index += 1;
ret
}
}
/// Runtime, in-memory, representation of records file structure.
#[derive(Debug)]
pub struct Replay {
records_file: Arc<RwLock<RecordsFile>>,
io_depth: u16,
exit_on_error: bool,
state: Arc<Mutex<SharedState>>,
exclude_files_regex: Vec<Regex>,
}
impl Replay {
/// Creates Replay from input `args`.
pub fn new(args: &ReplayArgs) -> Result<Self, Error> {
let _dbg = scoped_log(1, "new");
let reader: File = File::open(&args.path).map_err(|source| Error::Open {
source,
path: args.path.to_str().unwrap().to_owned(),
})?;
let rf: RecordsFile = serde_cbor::from_reader(reader)
.map_err(|error| Error::Deserialize { error: error.to_string() })?;
let mut exclude_files_regex: Vec<Regex> = Vec::new();
// The path to the configuration file is optional in the command.
// If the path is provided, the configuration file will be read.
if !&args.config_path.as_os_str().is_empty() {
let config_reader = File::open(&args.config_path).map_err(|source| Error::Open {
source,
path: args.path.to_str().unwrap().to_owned(),
})?;
let cf: ConfigFile = serde_json::from_reader(config_reader)
.map_err(|error| Error::Deserialize { error: error.to_string() })?;
for file_to_exclude in &cf.files_to_exclude_regex {
exclude_files_regex.push(Regex::new(file_to_exclude).unwrap());
}
}
Ok(Self {
records_file: Arc::new(RwLock::new(rf)),
io_depth: args.io_depth,
exit_on_error: args.exit_on_error,
state: Arc::new(Mutex::new(SharedState {
fds: LruCache::new(args.max_fds.into()),
records_index: 0,
result: Ok(()),
})),
exclude_files_regex,
})
}
/// Replay records.
pub fn replay(self) -> Result<(), Error> {
let _dbg = scoped_log(1, "replay");
let mut threads = vec![];
for i in 0..self.io_depth {
let i_clone = i as usize;
let state = self.state.clone();
let records_file = self.records_file.clone();
let exit_on_error = self.exit_on_error;
let exclude_files_regex = self.exclude_files_regex.clone();
let mut buffer = Box::new([0u8; READ_SZ]);
threads.push(thread::Builder::new().spawn(move || {
worker(
i_clone,
state,
records_file,
exit_on_error,
exclude_files_regex,
buffer.as_mut_slice(),
)
}));
}
for thread in threads {
thread.unwrap().join().unwrap();
}
replace(&mut self.state.lock().unwrap().result, Ok(()))
}
}
// WARNING: flaky tests.
// In these tests we create files, invalidate their caches and then replay.
// Verify that after reply the same portions of data is in memory.
//
// Since these tests to rely on presence or absence of data in cache, the
// files used by the tests should not be in tmp filesystem. So we use relative
// path as target directory. There is no guarantee that this target directory
// is not on temp filesystem but chances are better than using target directory
// in tempfs.
//
// Tests can be flaky if the system under tests is running low on memory. The
// tests create file using O_DIRECT so that no data is left in file cache.
// Though this is sufficient to avoid caching, but other processes reading these
// files(like anti-virus) or some other system processes might change the state
// of the cache. Or it may happen that the filesystem evicts the file before
// we verify that read ahead worked as intended.
#[cfg(test)]
pub mod tests {
use std::{
assert,
io::Write,
ops::Range,
path::{Path, PathBuf},
time::Duration,
};
use crate::format::DeviceNumber;
use crate::format::FsInfo;
use crate::format::InodeNumber;
use crate::nanoseconds_since_boot;
use nix::sys::mman::MapFlags;
use nix::sys::mman::ProtFlags;
use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::fs::OpenOptions;
use std::num::NonZeroUsize;
use std::os::fd::AsFd;
use std::os::unix::fs::symlink;
use std::os::unix::fs::MetadataExt;
use std::ptr::NonNull;
use tempfile::NamedTempFile;
use super::*;
use crate::tracer::{
page_size,
tests::{copy_uncached_files_and_record_from, setup_test_dir},
};
static MB: u64 = 1024 * 1024;
static KB: u64 = 1024;
fn random_write(file: &mut NamedTempFile, base: u64) -> Range<u64> {
let start: u64 = base + (rand::random::<u64>() % (base / 2)) as u64;
let len: u64 = rand::random::<u64>() % (32 * KB);
let buf = vec![5; len as usize];
nix::sys::uio::pwrite(file.as_fd(), &buf, start as i64).unwrap();
start..(start + len)
}
pub(crate) fn create_file(
path: Option<&Path>,
align: Option<u64>,
) -> (NamedTempFile, Vec<Range<u64>>) {
let mut file = if let Some(path) = path {
NamedTempFile::new_in(path).unwrap()
} else {
NamedTempFile::new().unwrap()
};
let range1 = random_write(&mut file, 32 * KB);
let range2 = random_write(&mut file, 128 * KB);
let range3 = random_write(&mut file, 4 * MB);
if let Some(align) = align {
let orig_size = file.metadata().unwrap().len();
let aligned_size = orig_size + (align - (orig_size % align));
file.set_len(aligned_size).unwrap();
}
(file, vec![range1, range2, range3])
}
pub(crate) fn generate_cached_files_and_record(
path: Option<&Path>,
create_symlink: bool,
align: Option<u64>,
) -> (RecordsFile, Vec<(NamedTempFile, Vec<Range<u64>>)>) {
let file1 = create_file(path, align);
let file2 = create_file(path, align);
let file3 = create_file(path, align);
let mut f: RecordsFileBuilder = Default::default();
f.add_file(file1.0.path().to_str().unwrap());
f.add_file(file2.0.path().to_str().unwrap());
f.add_file(file3.0.path().to_str().unwrap());
if create_symlink {
let symlink_path = format!("{}-symlink", file1.0.path().to_str().unwrap());
symlink(file1.0.path().file_name().unwrap(), &symlink_path).unwrap();
f.add_file(&symlink_path);
}
let rf = f.build().unwrap();
(rf, vec![file1, file2, file3])
}
/// RecordsFileBuilder is primarily used for testing purpose. This
/// is a thin wrapper around "Record". This gives the ability
/// to test Records functionality. The flow of this test is as follows:
///
/// 1: generate_cached_files_and_record -> This will create temporary files of different length
/// and builds the "RecordFile" format.
/// 2: For each of the file path create, a "RecordsFile" is generated.
/// a: mmap the file based on the length.
/// b: call mincore() to get the residency of pages in memory for the given
/// length.
/// c: Iterate over the buffer of pages returned by mincore(). If a page
/// is not resident in RAM, construct the "Record" structure.
/// 3: build() function will finally return a constructed Prefetch Record which
/// contains all the "Record" structure required for "Replay".
#[derive(Debug, Default, Deserialize, Serialize)]
pub struct RecordsFileBuilder {
// Temporarily holds paths of all files opened by other processes.
pub(crate) paths: HashMap<String, FileId>,
// Read inode numbers
inode_numbers: HashMap<(DeviceNumber, InodeNumber), FileId>,
}
impl RecordsFileBuilder {
pub fn add_file(&mut self, path: &str) {
if self.paths.contains_key(path) {
return;
}
self.paths.insert(path.to_owned(), FileId(self.paths.len() as u64));
}
pub fn build(&mut self) -> Result<RecordsFile, Error> {
let mut rf = RecordsFile::default();
for (path, mut id) in self.paths.drain() {
let stat = Path::new(&path)
.metadata()
.map_err(|source| Error::Stat { source, path: path.clone() })?;
rf.inner
.filesystems
.entry(stat.dev())
.or_insert(FsInfo { block_size: stat.blksize() });
if let Some(orig_id) = self.inode_numbers.get(&(stat.dev(), stat.ino())) {
let inode = rf.inner.inode_map.get_mut(orig_id).unwrap();
inode.paths.push(path.clone());
// There may be multiple paths for the file so from those path we may have multiple
// ids. Override the id.
id = orig_id.clone();
} else {
self.inode_numbers.insert((stat.dev(), stat.ino()), id.clone());
rf.insert_or_update_inode(id.clone(), &stat, path.clone());
}
if let Some(mmap) = Mmap::create(&path, id)? {
mmap.get_records(&mut rf.inner.records)?;
}
}
Ok(rf)
}
}
#[derive(Debug)]
pub(crate) struct Mmap {
map_addr: *mut c_void,
length: usize,
#[allow(dead_code)]
file: File,
file_id: FileId,
}
impl Mmap {
pub fn create(path: &str, file_id: FileId) -> Result<Option<Self>, Error> {
let file = OpenOptions::new()
.read(true)
.write(false)
.open(path)
.map_err(|source| Error::Open { source, path: path.to_owned() })?;
let length = file
.metadata()
.map_err(|source| Error::Stat { source, path: path.to_owned() })?
.len() as usize;
if length == 0 {
return Ok(None);
}
// SAFETY: This is safe because
// - the length is checked for zero
// - offset is set to 0
let map_addr = unsafe {
nix::sys::mman::mmap(
None,
NonZeroUsize::new(length).unwrap(),
ProtFlags::PROT_READ,
MapFlags::MAP_SHARED,
file.as_fd(),
0,
)
.map_err(|source| Error::Mmap {
error: source.to_string(),
path: path.to_owned(),
})?
};
Ok(Some(Self { map_addr: map_addr.as_ptr(), length, file, file_id }))
}
/// Construct the "Record" file based on pages resident in RAM.
pub(crate) fn get_records(&self, records: &mut Vec<Record>) -> Result<(), Error> {
let page_size = page_size()?;
let page_count = (self.length + page_size - 1) / page_size;
let mut buf: Vec<u8> = vec![0_u8; page_count];
// SAFETY: This is safe because
// - the file is mapped
// - buf points to a valid and sufficiently large memory region with the
// requirement of (length+PAGE_SIZE-1) / PAGE_SIZE bytes
let ret = unsafe { libc::mincore(self.map_addr, self.length, buf.as_mut_ptr()) };
if ret < 0 {
return Err(Error::Custom {
error: format!("failed to query resident pages: {}", Errno::last_raw()),
});
}
let mut i = 0;
let mut offset_length: Option<(u64, u64)> = None;
for (index, resident) in buf.iter().enumerate() {
if *resident != 0 {
if let Some((_, length)) = &mut offset_length {
*length += page_size as u64;
} else {
offset_length = Some((index as u64 * page_size as u64, page_size as u64));
}
} else if let Some((offset, length)) = offset_length {
i += 1;
records.push(Record {
file_id: self.file_id.clone(),
offset,
length,
timestamp: nanoseconds_since_boot(),
});
offset_length = None;
}
}
if let Some((offset, length)) = offset_length {
i += 1;
records.push(Record {
file_id: self.file_id.clone(),
offset,
length,
timestamp: nanoseconds_since_boot(),
});
}
debug!("records found: {} for {:?}", i, self);
Ok(())
}
}
impl Drop for Mmap {
fn drop(&mut self) {
// SAFETY: This is safe because
// - addr is mapped and is multiple of page_size
let ret = unsafe {
nix::sys::mman::munmap(NonNull::new(self.map_addr).unwrap(), self.length)
};
if let Err(e) = ret {
error!(
"failed to munmap {:p} {} with {}",
self.map_addr,
self.length,
e.to_string()
);
}
}
}
// Please see comment above RecordsFileBuilder.
fn rebuild_records_file(files: &[(PathBuf, Vec<Range<u64>>)]) -> RecordsFile {
// Validate that caches are dropped
let mut f: RecordsFileBuilder = Default::default();
for (path, _) in files {
f.add_file(path.to_str().unwrap());
}
f.build().unwrap()
}
fn ensure_files_not_cached(files: &mut [(PathBuf, Vec<Range<u64>>)]) {
assert!(rebuild_records_file(files).inner.records.is_empty());
}
fn has_record(records: &[Record], key: &Record) -> bool {
for r in records {
if r.offset == key.offset && r.length == key.length {
return true;
}
}
false
}
fn compare_records(old: &[Record], new: &[Record]) {
for key in new {
if !has_record(old, key) {
panic!("Failed to file {:?} in {:?}", key, old);
}
}
}
fn create_test_config_file(files_to_exclude_regex: Vec<String>) -> String {
let cfg = ConfigFile { files_to_exclude_regex, ..Default::default() };
serde_json::to_string(&cfg).unwrap()
}
// TODO: Split this into individual tests for better readability.
// b/378554334
fn test_replay_internal(
create_symlink: bool,
exit_on_error: bool,
inject_error: bool,
exclude_all_files: bool,
empty_exclude_file_list: bool,
) {
let page_size = page_size().unwrap() as u64;
let test_base_dir = setup_test_dir();
let (rf, mut files) =
generate_cached_files_and_record(None, create_symlink, Some(page_size));
// Here "uncached_files" emulate the files after reboot when none of those files data is in cache.
let (mut uncached_rf, mut uncached_files) =
copy_uncached_files_and_record_from(Path::new(&test_base_dir), &mut files, &rf);
// Injects error(s) in the form of invalid filename
if inject_error {
if let Some(v) = uncached_rf.inner.inode_map.values_mut().next() {
for path in &mut v.paths {
path.push('-');
}
}
}
let mut file = NamedTempFile::new().unwrap();
file.write_all(&uncached_rf.add_checksum_and_serialize().unwrap()).unwrap();
let mut config_file = NamedTempFile::new().unwrap();
let mut files_to_exclude: Vec<String> = Vec::new();
if exclude_all_files {
// Exclude files from replay by adding them in config
for v in uncached_rf.inner.inode_map.values_mut() {
for path in &mut v.paths {
files_to_exclude.push(path.to_string())
}
}
} else if empty_exclude_file_list {
files_to_exclude.extend(vec![]);
} else {
// Exclude file1 and file2 during replay
files_to_exclude.extend(vec!["file1".to_owned(), "file2".to_owned()]);
}
// Create a config json to exclude files during replay
let config_file_contents = create_test_config_file(files_to_exclude);
config_file.write_all(config_file_contents.as_bytes()).unwrap();
ensure_files_not_cached(&mut uncached_files);
let replay = Replay::new(&ReplayArgs {
path: file.path().to_owned(),
io_depth: 32,
max_fds: 128,
exit_on_error,
config_path: config_file.path().to_owned(),
})
.unwrap();
let result = replay.replay();
// Sleep a bit so that readaheads are complete.
thread::sleep(Duration::from_secs(1));
if exit_on_error && inject_error {
result.expect_err("Failure was expected");
} else if exclude_all_files {
let new_rf = rebuild_records_file(&uncached_files);
assert!(new_rf.inner.records.is_empty());
} else {
result.unwrap();
// At this point, we have prefetched data for uncached file bringing same set of
// data in memory as the original cached files.
// If we record prefetch data for new files, we should get same records files
// (offset and lengths) except that the file names should be different.
// This block verifies it.
// Note: `new_rf` is for uncached_files. But, [un]fortunately, those "uncached_files"
// are now cached after we replayed the records.
let new_rf = rebuild_records_file(&uncached_files);
assert!(!new_rf.inner.records.is_empty());
assert_eq!(rf.inner.inode_map.len(), new_rf.inner.inode_map.len());
assert_eq!(rf.inner.records.len(), new_rf.inner.records.len());
compare_records(&rf.inner.records, &new_rf.inner.records);
}
}
#[test]
fn test_replay() {
test_replay_internal(true, false, false, false, false);
}
#[test]
fn test_replay_strict() {
test_replay_internal(true, true, false, false, false);
}
#[test]
fn test_replay_no_symlink() {
test_replay_internal(false, false, false, false, false);
}
#[test]
fn test_replay_no_symlink_strict() {
test_replay_internal(false, true, false, false, false);
}
#[test]
fn test_replay_fails_on_error() {
test_replay_internal(true, true, true, false, false);
}
#[test]
fn test_replay_exclude_all_files() {
test_replay_internal(true, false, false, true, false);
}
#[test]
fn test_replay_empty_exclude_files_list() {
test_replay_internal(true, false, false, false, true);
}
}

View file

@ -0,0 +1,897 @@
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! See top level documentation for `crate::tracer`.
use std::collections::hash_map::Iter;
use std::fs::symlink_metadata;
use std::io::{ErrorKind, Write};
use std::iter::Iterator;
use std::mem::take;
use std::os::unix::fs::MetadataExt;
use std::{
collections::{HashMap, HashSet},
fs::read_to_string,
option::Option,
path::{Path, PathBuf},
};
use log::{debug, error, info, warn};
use regex::Regex;
use serde::Deserialize;
use serde::Serialize;
use walkdir::{DirEntry, WalkDir};
use crate::format::{coalesce_records, FsInfo};
use crate::tracer::{page_size, TracerConfigs};
use crate::{
format::{DeviceNumber, InodeNumber},
tracer::{TraceSubsystem, EXCLUDE_PATHS},
Error, FileId, Record, RecordsFile,
};
static MOUNTINFO_PATH: &str = "/proc/self/mountinfo";
// Trace events to enable
// Paths are relative to trace mount point
static TRACE_EVENTS: &[&str] =
&["events/filemap/mm_filemap_add_to_page_cache/enable", "tracing_on"];
// Filesystem types to ignore
static EXCLUDED_FILESYSTEM_TYPES: &[&str] = &[
"binder",
"bpf",
"cgroup",
"cgroup2",
"configfs",
"devpts",
"fuse", // No emulated storage
"fusectl",
"proc",
"pstore",
"selinuxfs",
"sysfs",
"tmpfs", // Check for apex mount points
"tracefs",
"functionfs", // adb, fastboot
"f2fs", // Skip /data mounts
];
#[cfg(target_os = "linux")]
type MajorMinorType = u32;
#[cfg(target_os = "android")]
type MajorMinorType = i32;
// TODO(b/302056482): Once we uprev nix crate, we can use the function exported by the crate.
fn major(dev: DeviceNumber) -> MajorMinorType {
(((dev >> 32) & 0xffff_f000) | ((dev >> 8) & 0x0000_0fff)) as MajorMinorType
}
// TODO(b/302056482): Once we uprev nix crate, we can use the function exported by the crate.
fn minor(dev: DeviceNumber) -> MajorMinorType {
(((dev >> 12) & 0xffff_ff00) | ((dev) & 0x0000_00ff)) as MajorMinorType
}
// TODO(b/302056482): Once we uprev nix crate, we can use the function exported by the crate.
fn makedev(major: MajorMinorType, minor: MajorMinorType) -> DeviceNumber {
let major = major as DeviceNumber;
let minor = minor as DeviceNumber;
((major & 0xffff_f000) << 32)
| ((major & 0x0000_0fff) << 8)
| ((minor & 0xffff_ff00) << 12)
| (minor & 0x0000_00ff)
}
fn build_device_number(major: &str, minor: &str) -> Result<DeviceNumber, Error> {
Ok(makedev(
major.parse::<MajorMinorType>().map_err(|e| Error::Custom {
error: format!("Failed to parse major number from {} with {}", major, e),
})?,
minor.parse::<MajorMinorType>().map_err(|e| Error::Custom {
error: format!("Failed to parse major number from {} with {}", major, e),
})?,
))
}
// Returns timestamp in nanoseconds
fn build_timestamp(seconds: &str, microseconds: &str) -> Result<u64, Error> {
let seconds = seconds.parse::<u64>().map_err(|e| Error::Custom {
error: format!("Failed to parse seconds from {} with {}", seconds, e),
})?;
let microseconds = microseconds.parse::<u64>().map_err(|e| Error::Custom {
error: format!("Failed to parse microseconds from {} with {}", seconds, e),
})?;
Ok((seconds * 1_000_000_000) + (microseconds * 1_000))
}
#[cfg(not(target_os = "android"))]
fn is_highly_privileged_path(_path: &Path) -> bool {
false
}
#[cfg(target_os = "android")]
fn is_highly_privileged_path(path: &Path) -> bool {
// Following directories contain a mix of files with and without access to stat/read.
// We do not completely exclude these directories as there is still a lot of
// file we can issue readahead on. Some of the files on which readahead fails include
// - /system/bin/run-as
// - /data/data/com.android.storagemanager
// - /system/apex/com.android.art/bin/dex2oat32
// - /data/user/0/com.android.systemui
//
// - TODO: /system/apex: Apex files in read-only partition may be read during boot.
// However, some files may not have access. Double check the record files
// to filter out the exact path.
let privileged_paths = [
"/data/data",
"/data/user/0",
"/data/user_de/0",
"/system/bin/",
"/system/etc/selinux/",
"/system/system_ext/etc/selinux/",
"/system/product/etc/selinux/",
"/system/vendor/etc/selinux/",
"/system_ext/etc/selinux/",
"/product/etc/selinux/",
"/vendor/etc/selinux/",
"/system/xbin",
"/system/etc/",
"/data/",
"/postinstall/",
"/mnt/",
"/metadata/",
];
for privileged in privileged_paths {
if path.to_str().unwrap().starts_with(privileged) {
return true;
}
}
false
}
enum DeviceState {
Include((DeviceNumber, PathBuf)),
Exclude(DeviceNumber),
}
/// Utility struct that helps to include and exclude devices and mount points that need and don't
/// need prefetching.
#[derive(Debug, Deserialize, Serialize)]
struct MountInfo {
// Map of device number to mount points
included_devices: HashMap<DeviceNumber, PathBuf>,
// Devices that we don't want to prefetch - like devices backing tempfs and sysfs
excluded_devices: HashSet<DeviceNumber>,
}
impl MountInfo {
// Parses file at `path` to build `Self`.`
fn create(path: &str) -> Result<Self, Error> {
let buf = read_to_string(path)
.map_err(|e| Error::Read { error: format!("Reading {} failed with: {}", path, e) })?;
Self::with_buf(&buf)
}
// Parses string in `buf` to build `Self`.
fn with_buf(buf: &str) -> Result<Self, Error> {
let regex = Self::get_regex()?;
let mut included_devices: HashMap<DeviceNumber, PathBuf> = HashMap::new();
let mut excluded_devices = HashSet::new();
let excluded_filesystem_types: HashSet<String> =
EXCLUDED_FILESYSTEM_TYPES.iter().map(|s| String::from(*s)).collect();
for line in buf.lines() {
if let Some(state) = Self::parse_line(&regex, &excluded_filesystem_types, line)? {
match state {
DeviceState::Include((device, path)) => {
included_devices.insert(device, path);
}
DeviceState::Exclude(device) => {
excluded_devices.insert(device);
}
}
}
}
Ok(Self { included_devices, excluded_devices })
}
fn parse_line(
re: &Regex,
excluded_filesystem_types: &HashSet<String>,
line: &str,
) -> Result<Option<DeviceState>, Error> {
let caps = match re.captures(line) {
Some(caps) => caps,
None => {
return Ok(None);
}
};
if &caps["relative_path"] != "/" {
return Ok(None);
}
let mount_point = &caps["mount_point"];
let mnt_pnt_with_slash = format!("{}/", mount_point);
let device_number = build_device_number(&caps["major"], &caps["minor"])?;
let fs_type = &caps["fs_type"];
if excluded_filesystem_types.contains(fs_type) {
info!(
"excluding fs type: {} for {} mount-point {} slash {}",
fs_type, line, mount_point, mnt_pnt_with_slash
);
return Ok(Some(DeviceState::Exclude(device_number)));
}
for excluded in EXCLUDE_PATHS {
if mnt_pnt_with_slash.starts_with(excluded) {
info!(
"exclude-paths fs type: {} for {} mount-point {} slash {}",
fs_type, line, mount_point, mnt_pnt_with_slash
);
return Ok(Some(DeviceState::Exclude(device_number)));
}
}
Ok(Some(DeviceState::Include((device_number, PathBuf::from(mount_point)))))
}
fn get_regex() -> Result<Regex, Error> {
Regex::new(concat!(
r"^\s*(?P<id_unknown1>\S+)",
r"\s+(?P<id_unknown2>\S+)",
r"\s+(?P<major>[0-9]+):(?P<minor>[0-9]+)",
r"\s+(?P<relative_path>\S+)",
r"\s+(?P<mount_point>\S+)",
r"\s+(?P<mount_opt>\S+)",
r"\s+(?P<shared>\S+)",
r"\s+\S+",
r"\s+(?P<fs_type>\S+)",
r"\s+(?P<device_path>\S+)"
))
.map_err(|e| Error::Custom {
error: format!("create regex for parsing mountinfo failed with: {}", e),
})
}
fn is_excluded(&self, device: &DeviceNumber) -> bool {
self.excluded_devices.contains(device)
}
fn get_included(&self) -> Iter<DeviceNumber, PathBuf> {
self.included_devices.iter()
}
}
#[derive(Default, PartialEq, Debug, Eq, Hash)]
struct TraceLineInfo {
device: DeviceNumber,
inode: InodeNumber,
offset: u64,
timestamp: u64,
}
impl TraceLineInfo {
pub fn from_trace_line(re: &Regex, line: &str) -> Result<Option<Self>, Error> {
let caps = match re.captures(line) {
Some(caps) => caps,
None => return Ok(None),
};
let major = &caps["major"];
let minor = &caps["minor"];
let ino = &caps["ino"];
let offset = &caps["offset"];
let timestamp = build_timestamp(&caps["seconds"], &caps["microseconds"])?;
Ok(Some(TraceLineInfo {
device: build_device_number(major, minor)?,
inode: u64::from_str_radix(ino, 16).map_err(|e| Error::Custom {
error: format!("failed parsing inode: {} : {}", ino, e),
})?,
offset: offset.parse::<u64>().map_err(|e| Error::Custom {
error: format!("failed parsing offset: {} : {}", offset, e),
})?,
timestamp,
}))
}
#[cfg(test)]
pub fn from_fields(
major: MajorMinorType,
minor: MajorMinorType,
inode: u64,
offset: u64,
timestamp: u64,
) -> Self {
Self { device: makedev(major, minor), inode, offset, timestamp }
}
// Convenience function to create regex. Used once per life of `record` but multiple times in
// case of tests.
pub fn get_trace_line_regex() -> Result<Regex, Error> {
// TODO: Fix this Regex expression for 5.15 kernels. This expression
// works only on 6.1+. Prior to 6.1, "<page>" was present in the output.
Regex::new(concat!(
r"^\s+(?P<cmd_pid>\S+)",
r"\s+(?P<cpu>\S+)",
r"\s+(?P<irq_stuff>\S+)",
r"\s+(?P<seconds>[0-9]+)\.(?P<microseconds>[0-9]+):",
r"\s+mm_filemap_add_to_page_cache:",
r"\s+dev\s+(?P<major>[0-9]+):(?P<minor>[0-9]+)",
r"\s+ino\s+(?P<ino>\S+)",
//r"\s+(?P<page>\S+)",
r"\s+(?P<pfn>\S+)",
r"\s+ofs=(?P<offset>[0-9]+)"
))
.map_err(|e| Error::Custom {
error: format!("create regex for tracing failed with: {}", e),
})
}
}
#[derive(Debug, Serialize, Deserialize)]
struct MissingFile {
major_no: MajorMinorType,
minor_no: MajorMinorType,
inode: InodeNumber,
records: Vec<Record>,
}
#[derive(Debug, Default, Deserialize, Serialize)]
struct DebugInfo {
// Check all inodes for which paths don't exists. These are the files which
// * got deleted before we got to them
// * are filesystem internal files that fs access only via inode numbers.
missing_files: HashMap<FileId, MissingFile>,
// Number of bytes read that belongs to directory type inodes.
directory_read_bytes: u64,
// Number of bytes read from files for which we could not find a path in
// the filesystems.
missing_path_bytes: u64,
// Paths for which the current process doesn't have read permission.
privileged_paths: Vec<PathBuf>,
}
#[derive(Debug, Serialize)]
pub(crate) struct MemTraceSubsystem {
device_inode_map: HashMap<DeviceNumber, HashMap<InodeNumber, FileId>>,
// Count of all InodeNumber held by `device_inode_map`. This is handy to assign unique
// FileId.
inode_count: u64,
// `Record`s built from parsing read trace lines.
records: Vec<Record>,
// Regex to parse lines from trace_pipe.
#[serde(skip_serializing)]
regex: Regex,
// Mounted devices/filesystems either at the time of parsing trace file or at the time
// of building RecordsFile from parsed lines.
mount_info: MountInfo,
// A copy of TracerConfigs
tracer_configs: Option<TracerConfigs>,
// system page size stored to avoid frequent syscall to get the page size.
page_size: u64,
// The fields of the debug_info are populated when build_records_file is called (after lines
// are parsed from the trace file/pipe).
debug_info: DebugInfo,
}
impl MemTraceSubsystem {
pub fn update_configs(configs: &mut TracerConfigs) {
for path in EXCLUDE_PATHS {
configs.excluded_paths.push(path.to_owned().to_string());
}
for event in TRACE_EVENTS {
configs.trace_events.push(event.to_owned().to_string());
}
configs.mountinfo_path = Some(MOUNTINFO_PATH.to_string());
}
pub fn create_with_configs(tracer_configs: TracerConfigs) -> Result<Self, Error> {
static INITIAL_RECORDS_CAPACITY: usize = 100_000;
debug!("TracerConfig: {:#?}", tracer_configs);
let regex = TraceLineInfo::get_trace_line_regex()?;
let mount_info = MountInfo::create(tracer_configs.mountinfo_path.as_ref().unwrap())?;
debug!("mountinfo: {:#?}", mount_info);
Ok(Self {
device_inode_map: HashMap::new(),
inode_count: 0,
// For one product of android, we see around 50k records. To avoid a lot allocations
// and copying of records, we create a vec of this size.
//
// We do this to reduces chances of losing data, however unlikely, coming over
// `trace_pipe`.
//
// Note: Once we are done reading trace lines, we are less pedantic about allocations
// and mem copies.
records: Vec::with_capacity(INITIAL_RECORDS_CAPACITY),
regex,
mount_info,
tracer_configs: Some(tracer_configs),
page_size: page_size()? as u64,
debug_info: DebugInfo {
missing_files: HashMap::new(),
directory_read_bytes: 0,
missing_path_bytes: 0,
privileged_paths: vec![],
},
})
}
fn new_file_id(&mut self) -> FileId {
let id = self.inode_count;
self.inode_count += 1;
FileId(id)
}
fn get_trace_info(&self, line: &str) -> Result<Option<TraceLineInfo>, Error> {
TraceLineInfo::from_trace_line(&self.regex, line)
}
// Returns true if the file or directory is on a device which is excluded from walking.
// If the path was excluded because the current process doesn't have privileged to read it,
// the path gets added to `privileged` list.
fn is_excluded(&self, entry: &DirEntry, device: u64, privileged: &mut Vec<PathBuf>) -> bool {
// We skip paths that are reside on excluded devices here. This is ok because a
// non-excluded mount point will have a separate entry in MountInfo. For example
// - `/` has ext4
// - `/tmp` has tempfs
// - `/tmp/mnt` has ext4 that we are interested in.
// MountInfo will have three entries - `/`, `/tmp/` and `/tmp/mnt`. Skipping walking
// `/tmp` while walking `/` is ok as next `mount_info.get_included()` will return
// `/tmp/mnt` path.
//
//
// We skip links here as they can refer to mount points across
// filesystems. If that path is valid and access are valid, then
// we should have entry by the file's <device, inode> pair.
//
//
// We skip devices that don't match current walking device because we eventually
// walk other devices.
match symlink_metadata(entry.path()) {
Ok(lstat) => {
if self.mount_info.is_excluded(&lstat.dev())
|| lstat.dev() != device
|| lstat.file_type().is_symlink()
{
return true;
}
}
Err(e) => {
error!("stat on {} failed with {}", entry.path().to_str().unwrap(), e);
// We treat EACCES special because on some platforms, like android, process needs to
// have very special set of permissions to access some inodes.
// We ignore errors in such cases *after* making an effort to get to them.
if e.kind() == ErrorKind::PermissionDenied
&& is_highly_privileged_path(entry.path())
{
privileged.push(entry.path().to_owned());
return true;
}
}
}
// On error, we return false because if lstat has failed, it will fail following operations
// including stat.
false
}
}
impl TraceSubsystem for MemTraceSubsystem {
fn add_line(&mut self, line: &str) -> Result<(), Error> {
if let Some(info) = self.get_trace_info(line)? {
if self.mount_info.is_excluded(&info.device) {
return Ok(());
}
self.device_inode_map.entry(info.device).or_default();
let file_id = if let Some(id) =
self.device_inode_map.get_mut(&info.device).unwrap().get(&info.inode)
{
id.clone()
} else {
self.new_file_id()
};
self.device_inode_map
.get_mut(&info.device)
.unwrap()
.insert(info.inode, file_id.clone());
self.records.push(Record {
file_id,
offset: info.offset,
length: self.page_size,
timestamp: info.timestamp,
});
}
Ok(())
}
fn build_records_file(&mut self) -> Result<RecordsFile, Error> {
// reset debug_info in case build_records_file was called twice.
self.debug_info = DebugInfo::default();
let mut rf = RecordsFile::default();
let mut directories = HashSet::new();
// TODO(b/302194377): We are holding all privileged_paths in this variable and then
// transferring it to `self.debug_info.privileged_paths` later. We can avoid this step
// if we directly update `self.debug_info.privileged_paths`. To do so, we need to refactor
// code to make borrow not complain at several places - ex. immutably borrowing
// `self.mount_info` in outer loop and then mutably borrowing
// `self.debug_info.privileged_paths`.
let mut privileged_paths = vec![];
// Reload mount_info. When we created mount_info for the first time, maybe
// the system was in early boot phase. Reload the mount_info so as to get
// current/new mount points.
if let Some(tracer_config) = &self.tracer_configs {
self.mount_info = MountInfo::create(tracer_config.mountinfo_path.as_ref().unwrap())?;
debug!("reloaded mountinfo: {:#?}", self.mount_info);
}
for (device, root_path) in self.mount_info.get_included() {
let inode_map = if let Some(map) = self.device_inode_map.get(device) {
map
} else {
continue;
};
if inode_map.is_empty() {
return Err(Error::Custom {
error: format!("Unexpected empty records for {:?}", root_path),
});
}
let mut block_size = 0;
let walker = WalkDir::new(root_path).into_iter();
for entry in
walker.filter_entry(|e| !self.is_excluded(e, *device, &mut privileged_paths))
{
let path = match entry {
Ok(entry) => entry.path().to_owned(),
Err(e) => {
error!("walking directory failed: {} {}", root_path.to_str().unwrap(), e);
continue;
}
};
let stat = match path.metadata() {
Ok(stat) => stat,
Err(e) => {
error!("stat on {} failed with {}", path.to_str().unwrap(), e);
continue;
}
};
block_size = stat.blksize();
let file_id = if let Some(id) = inode_map.get(&stat.ino()) {
id.clone()
} else {
continue;
};
// We cannot issue a normal readahead on directories. So we skip those records that
// belong to directories.
if stat.file_type().is_dir() {
info!(
"skipping directory readahead record for file_id:{file_id} ino:{} path:{} ",
stat.ino(),
path.to_str().unwrap()
);
directories.insert(file_id.clone());
continue;
}
rf.insert_or_update_inode(file_id, &stat, path.to_str().unwrap().to_owned());
}
rf.inner.filesystems.insert(*device, FsInfo { block_size });
}
self.debug_info.privileged_paths.append(&mut privileged_paths);
for (device, inode_map) in &self.device_inode_map {
for (inode, file_id) in inode_map {
if !rf.inner.inode_map.contains_key(file_id) {
let major_no: MajorMinorType = major(*device);
let minor_no: MajorMinorType = minor(*device);
self.debug_info.missing_files.insert(
file_id.clone(),
MissingFile { major_no, minor_no, inode: *inode, records: vec![] },
);
}
}
}
// Remove all records that belong to directories or for which we did not find paths.
let mut records = vec![];
for record in take(&mut self.records) {
if directories.contains(&record.file_id) {
self.debug_info.directory_read_bytes += record.length;
} else if let Some(missing_file) =
self.debug_info.missing_files.get_mut(&record.file_id)
{
self.debug_info.missing_path_bytes += record.length;
missing_file.records.push(record);
} else {
records.push(record);
}
}
warn!(
"Recorded {} bytes worth of data read from directories",
self.debug_info.directory_read_bytes
);
warn!(
"Recorded {} bytes worth of data read from files that don't have paths",
self.debug_info.missing_path_bytes
);
rf.inner.records = coalesce_records(records, true);
Ok(rf)
}
fn serialize(&self, write: &mut dyn Write) -> Result<(), Error> {
write
.write_all(
&serde_json::to_vec(&self)
.map_err(|e| Error::Serialize { error: e.to_string() })?,
)
.map_err(|source| Error::Write { path: "intermediate file".to_owned(), source })
}
}
#[cfg(test)]
mod tests {
use nix::sys::stat::{major, minor};
use std::assert_eq;
use std::path::Path;
use crate::tracer::tests::{copy_uncached_files_and_record_from, setup_test_dir};
use crate::replay::tests::generate_cached_files_and_record;
use super::*;
static TRACE_BUFFER: &str = r#"
Settingide-502 [001] .... 484.360292: mm_filemap_add_to_page_CACHE: dev 254:6 ino cf1 page=68d477 pfn=59833 ofs=32768
Settingide-502 [001] .... 484.360311: mm_filemap_add_to_page_cache: dev 254:6 ino cf1 page=759458 pfn=59827 ofs=57344
BOX_ENTDED-3071 [001] .... 485.276715: mm_filemap_add_to_pag_ecache: dev 254:6 ino 1 page=00cc1c pfn=81748 ofs=13574144
BOX_ENTDED-3071 [001] .... 485.276990: mm_filemap_add_to_page_cache: dev 254:6 ino cf2 page=36540b pfn=60952 ofs=0
.gms.peent-843 [001] .... 485.545516: mm_filemap_add_to_page_cache: dev 254:6 ino 1 page=002e8b pfn=58928 ofs=13578240
.gms.peent-843 [001] .... 485.545820: mm_filemap_add_to_page_cache: dev 254:6 ino cf3 page=6233ce pfn=58108 ofs=0
an.bg-459 [001] .... 494.029396: mm_filemap_add_to_page_cache: dev 254:3 ino 7cf page=c5b5c7 pfn=373933 ofs=1310720
an.bg-459 [001] .... 494.029398: mm_filemap_add_to_page_cache: dev 254:3 ino 7cf page=b8b9ec pfn=410074 ofs=1314816
"#;
fn sample_mem_traces() -> (String, Vec<Option<TraceLineInfo>>) {
(
TRACE_BUFFER.to_owned(),
vec![
None,
None,
Some(TraceLineInfo::from_fields(254, 6, 0xcf1, 57344, 484360311000)),
None,
Some(TraceLineInfo::from_fields(254, 6, 0xcf2, 0, 485276990000)),
Some(TraceLineInfo::from_fields(254, 6, 0x1, 13578240, 485545516000)),
Some(TraceLineInfo::from_fields(254, 6, 0xcf3, 0, 485545820000)),
Some(TraceLineInfo::from_fields(254, 3, 0x7cf, 1310720, 494029396000)),
Some(TraceLineInfo::from_fields(254, 3, 0x7cf, 1314816, 494029398000)),
None,
],
)
}
#[test]
fn test_parse_trace_line() {
let (buf, res) = sample_mem_traces();
let re = TraceLineInfo::get_trace_line_regex().unwrap();
for (index, line) in buf.lines().enumerate() {
let found = TraceLineInfo::from_trace_line(&re, line).unwrap();
let expected = res.get(index).unwrap();
assert_eq!(found.is_some(), expected.is_some());
if found.is_some() {
assert_eq!(found.unwrap(), *expected.as_ref().unwrap());
}
}
}
#[test]
fn test_add_line() {
let test_base_dir = setup_test_dir();
let (rf, mut files) =
generate_cached_files_and_record(None, true, Some(page_size().unwrap() as u64));
let (_uncached_rf, uncached_files) =
copy_uncached_files_and_record_from(Path::new(&test_base_dir), &mut files, &rf);
let mut mount_include = HashMap::new();
let included_dev = uncached_files.get(0).unwrap().0.metadata().unwrap().dev();
let included_inode1 = uncached_files.get(0).unwrap().0.metadata().unwrap().ino();
let included_inode2 = uncached_files.get(1).unwrap().0.metadata().unwrap().ino();
let included_major = major(included_dev);
let included_minor = minor(included_dev);
mount_include.insert(included_dev, std::fs::canonicalize(test_base_dir).unwrap());
let mut mount_exclude = HashSet::new();
mount_exclude.insert(0);
let mut mem_tracer = MemTraceSubsystem {
device_inode_map: HashMap::new(),
inode_count: 0,
records: vec![],
regex: TraceLineInfo::get_trace_line_regex().unwrap(),
mount_info: MountInfo {
included_devices: mount_include,
excluded_devices: mount_exclude,
},
tracer_configs: None,
page_size: page_size().unwrap() as u64,
debug_info: DebugInfo {
missing_files: HashMap::new(),
directory_read_bytes: 0,
missing_path_bytes: 0,
privileged_paths: vec![],
},
};
let pg_size = page_size().unwrap();
// Format is major, minor, inode, offset
let inputs = vec![
(0, 0, 2, 10), // to be excluded. bad device.
(included_major, included_minor, included_inode1, 0),
(included_major, included_minor, included_inode1, 3 * pg_size),
// duplicate read
(included_major, included_minor, included_inode1, 3 * pg_size),
(0, 0, included_inode1, 10), // to be excluded. bad device.
(included_major, included_minor, included_inode1, 2 * pg_size), // contiguous
// non-contiguous
(included_major, included_minor, included_inode1, 12 * pg_size),
// same offset different inode
(included_major, included_minor, included_inode2, 3 * pg_size),
// Contiguous offset different inode
(included_major, included_minor, included_inode2, pg_size),
];
for (i, (major, minor, inode, offset)) in inputs.iter().enumerate() {
// used to timestamp the log line.
let seconds = i;
// used to timestamp the log line.
let microseconds = i;
for operation in &["mm_filemap_add_to_page_cache", "some_other_operation"] {
let line = format!(
" BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: \
dev {}:{} ino {:x} page=00000000f936540b pfn=60952 ofs={}",
seconds, microseconds, operation, major, minor, inode, offset
);
mem_tracer.add_line(&line).unwrap();
}
}
assert_eq!(mem_tracer.records.len(), 7);
assert_eq!(mem_tracer.device_inode_map.len(), 1);
assert_eq!(mem_tracer.device_inode_map.get(&included_dev).unwrap().len(), 2);
assert!(mem_tracer
.device_inode_map
.get(&included_dev)
.unwrap()
.contains_key(&included_inode1));
assert!(mem_tracer
.device_inode_map
.get(&included_dev)
.unwrap()
.contains_key(&included_inode2));
}
fn new_record(file: u64, offset: u64, length: u64, timestamp: u64) -> Record {
Record { file_id: FileId(file), offset, length, timestamp }
}
#[test]
fn test_get_records_file() {
let test_base_dir = setup_test_dir();
let (rf, mut files) =
generate_cached_files_and_record(None, true, Some(page_size().unwrap() as u64));
let (_uncached_rf, uncached_files) =
copy_uncached_files_and_record_from(Path::new(&test_base_dir), &mut files, &rf);
let mut mount_include = HashMap::new();
let included_dev = uncached_files.get(0).unwrap().0.metadata().unwrap().dev();
let included_inode1 = uncached_files.get(0).unwrap().0.metadata().unwrap().ino();
let included_inode2 = uncached_files.get(1).unwrap().0.metadata().unwrap().ino();
let included_major = major(included_dev);
let included_minor = minor(included_dev);
mount_include.insert(included_dev, std::fs::canonicalize(test_base_dir).unwrap());
let mut mount_exclude = HashSet::new();
mount_exclude.insert(0);
let mut mem_tracer = MemTraceSubsystem {
device_inode_map: HashMap::new(),
inode_count: 0,
records: vec![],
regex: TraceLineInfo::get_trace_line_regex().unwrap(),
mount_info: MountInfo {
included_devices: mount_include,
excluded_devices: mount_exclude,
},
tracer_configs: None,
page_size: page_size().unwrap() as u64,
debug_info: DebugInfo {
missing_files: HashMap::new(),
directory_read_bytes: 0,
missing_path_bytes: 0,
privileged_paths: vec![],
},
};
let pg_size = page_size().unwrap() as u64;
// Format is major, minor, inode, offset
let inputs = vec![
(0, 0, 2, 10), // to be excluded. bad device.
(included_major, included_minor, included_inode1, 0),
(included_major, included_minor, included_inode1, 3 * pg_size),
// duplicate read
(included_major, included_minor, included_inode1, 3 * pg_size),
(0, 0, included_inode1, 10), // to be excluded. bad device.
(included_major, included_minor, included_inode1, 2 * pg_size), // contiguous
// non-contiguous
(included_major, included_minor, included_inode1, 12 * pg_size),
// same offset different inode
(included_major, included_minor, included_inode2, 3 * pg_size),
// Contiguous offset different inode
(included_major, included_minor, included_inode2, pg_size),
];
for (i, (major, minor, inode, offset)) in inputs.iter().enumerate() {
// used to timestamp the log line.
let seconds = i;
// used to timestamp the log line.
let microseconds = i;
for operation in &["mm_filemap_add_to_page_cache", "some_other_operation"] {
let line = format!(
" BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: \
dev {}:{} ino {:x} page=00000000f936540b pfn=60952 ofs={}",
seconds, microseconds, operation, major, minor, inode, offset
);
mem_tracer.add_line(&line).unwrap();
}
}
let rf = mem_tracer.build_records_file().unwrap();
assert_eq!(
rf.inner.records,
vec![
new_record(0, 0, pg_size, 1000001000),
new_record(0, 2 * pg_size, 2 * pg_size, 2000002000),
new_record(0, 12 * pg_size, pg_size, 6000006000),
new_record(1, pg_size, pg_size, 8000008000),
new_record(1, 3 * pg_size, pg_size, 7000007000),
]
);
}
}

View file

@ -0,0 +1,965 @@
// Copyright (C) 2024 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Tracer supports collecting information based off of two different tracing
//! subsystems within `/sys/kernel/tracing`.
//!
//! ## Mem
//! Mem is preferred tracer.
//! ### Phase 1:
//! This phase relies on a trace event at
//! "events/filemap/mm_filemap_add_to_page_cache". When enabled, the event logs
//! a message that contains device id, inode number, offset of the page that is
//! being read. The tracer makes a note of this.
//!
//! ### Phase 2:
//! When the recording of events is done, tracer all get mount points for which
//! device id is recorded. Once it knows the mount points, it looks up file
//! paths for the inode numbers that it records. The paths, offset and lengths
//! are then stored in records file.
//!
//! Phase 2 is very IO intensive as entire filesystem is walked to find paths
//! for different inodes.
//!
pub(crate) mod mem;
use std::{
boxed::Box,
collections::HashSet,
fs::{create_dir, read_to_string, rename, File, OpenOptions},
io::{BufRead, BufReader, Read, Write},
path::{Path, PathBuf},
string::ToString,
sync::mpsc::{self, Receiver, Sender},
};
use log::{error, info};
use nix::time::ClockId;
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::{args::TracerType, format::RecordsFile};
use mem::MemTraceSubsystem;
pub(crate) static EXCLUDE_PATHS: &[&str] =
&["/dev/", "/proc/", "/sys/", "/tmp/", "/run/", "/config/", "/mnt/", "/storage/"];
/// During record phase, prefetch may modify files under `/sys/kernel/tracing/` to
/// - change trace buffer size so that we don't lose trace events
/// - enable a few trace events
/// - enable tracing
///
/// The old values are restored at the end of record.
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct TraceEventFile {
path: PathBuf,
restore_value: Option<String>,
}
impl TraceEventFile {
fn open_and_write(path: &Path, value: &str) -> Result<(), Error> {
let mut f = OpenOptions::new()
.write(true)
.read(true)
.open(path)
.map_err(|e| Error::Open { source: e, path: path.to_str().unwrap().to_string() })?;
f.write_all(value.as_bytes())
.map_err(|e| Error::Write { path: path.to_str().unwrap().to_owned(), source: e })
}
pub fn write(path: PathBuf, value: &str) -> Result<Self, Error> {
let restore_value = read_to_string(&path).map_err(|s| Error::Read {
error: format!("Reading {} failed:{}", path.to_str().unwrap(), s),
})?;
Self::open_and_write(&path, value)?;
info!(
"Changed contents of {} from {:?} to {}",
path.to_str().unwrap(),
restore_value,
value
);
Ok(Self { path, restore_value: Some(restore_value) })
}
pub fn enable(path: PathBuf) -> Result<Self, Error> {
Self::write(path, "1")
}
pub fn restore(&self) -> Result<(), Error> {
if let Some(restore_value) = &self.restore_value {
Self::open_and_write(&self.path, restore_value)
} else {
Ok(())
}
}
}
impl Drop for TraceEventFile {
fn drop(&mut self) {
if let Err(ret) = self.restore() {
error!(
"Failed to restore state of file {:?} with value: {:?}. Error: {}",
self.path,
self.restore_value,
ret.to_string()
);
}
}
}
#[derive(Debug, Deserialize, Serialize)]
pub(crate) struct TracerConfigs {
pub excluded_paths: Vec<String>,
pub buffer_size_file_path: String,
pub trace_base_path: PathBuf,
pub trace_events: Vec<String>,
pub mountinfo_path: Option<String>,
pub trace_operations: HashSet<String>,
// We never read back these fields. The only use for holding these around is to restore state at
// the end of run.
#[allow(dead_code)]
trace_files: Vec<TraceEventFile>,
}
impl TracerConfigs {
pub fn new(
kb_buffer_size: Option<u64>,
setup_tracing: bool,
tracer_type: TracerType,
trace_mount_point: Option<String>,
tracing_instance: Option<String>,
) -> Result<Self, Error> {
static TRACE_MOUNT_POINT: &str = "/sys/kernel/tracing";
// Trace buffer size file relative to trace mount point
static TRACE_BUFFER_SIZE_FILE: &str = "buffer_size_kb";
let trace_mount_point = trace_mount_point.unwrap_or_else(|| TRACE_MOUNT_POINT.to_owned());
let trace_base_path = if let Some(instance) = tracing_instance {
Path::new(&trace_mount_point).join("instances").join(instance)
} else {
Path::new(&trace_mount_point).to_owned()
};
if setup_tracing && !trace_base_path.exists() {
create_dir(&trace_base_path).map_err(|e| Error::Create {
source: e,
path: trace_base_path.to_str().unwrap().to_owned(),
})?;
}
if !trace_base_path.exists() {
return Err(Error::Custom {
error: format!(
"trace mount point doesn't exist: {}",
trace_base_path.to_str().unwrap().to_owned()
),
});
}
let mut configs = TracerConfigs {
excluded_paths: vec![],
buffer_size_file_path: TRACE_BUFFER_SIZE_FILE.to_owned(),
trace_base_path,
trace_events: vec![],
mountinfo_path: None,
trace_operations: HashSet::new(),
trace_files: vec![],
};
match tracer_type {
TracerType::Mem => MemTraceSubsystem::update_configs(&mut configs),
}
if setup_tracing {
let trace_base_dir = Path::new(&configs.trace_base_path);
if let Some(kb_buffer_size) = kb_buffer_size {
configs.trace_files.push(TraceEventFile::write(
trace_base_dir.join(&configs.buffer_size_file_path),
&kb_buffer_size.to_string(),
)?);
}
for path in &configs.trace_events {
configs.trace_files.push(TraceEventFile::enable(trace_base_dir.join(path))?);
}
}
Ok(configs)
}
}
/// Returns time, in nanoseconds, since boot
pub fn nanoseconds_since_boot() -> u64 {
if let Ok(t) = nix::time::clock_gettime(ClockId::CLOCK_MONOTONIC) {
//((t.tv_sec() * 1_000_000_000) + t.tv_nsec()) as u64
(1 + t.tv_nsec()) as u64
} else {
0
}
}
pub(crate) trait TraceSubsystem {
/// This routine is called whenever there is a new line available to be parsed.
/// The impl potentially want to parse the line and retain the data in memory.
/// Implementors are not expected to do heavy lifting tasks, like IO, in this context.
fn add_line(&mut self, line: &str) -> Result<(), Error>;
/// Generates a records file from all the collected data.
/// From this context, the implementors might process data by issuing queries to filesystems.
fn build_records_file(&mut self) -> Result<RecordsFile, Error>;
/// This helps us serialize internat state of tracing subsystem during record phase.
/// This allows us to get raw data for analysis of read pattern and debugging in situations
/// when we might not have access to system yet(ex. early boot phase) .
fn serialize(&self, writer: &mut dyn Write) -> Result<(), Error>;
}
/// Returns page size in bytes
pub(crate) fn page_size() -> Result<usize, Error> {
Ok(nix::unistd::sysconf(nix::unistd::SysconfVar::PAGE_SIZE)
.map_err(|e| Error::Custom { error: format!("failed to query page size: {}", e) })?
.ok_or(Error::Custom { error: "failed to query page size: None returned".to_string() })?
as usize)
}
pub struct Tracer {
// Open handle to static trace buffer file which is usually located at
// `/sys/kernel/tracing/trace`.
// See comment on top of `trace` function.
trace_file: BufReader<File>,
// Open handle to trace pipe which is usually located at
// `/sys/kernel/tracing/trace_pipe`.
// See comment on top of `trace` function.
trace_pipe: BufReader<File>,
// Signal to exit the infinite loop in `trace()`
exit_rx: Receiver<()>,
// tracing subsystem that actually parses trace lines and builds records.
tracing_subsystem: Box<dyn TraceSubsystem + Send>,
}
impl Tracer {
pub fn create(
kb_buffer_size: Option<u64>,
tracer_type: TracerType,
tracing_instance: Option<String>,
setup_tracing: bool,
) -> Result<(Self, Sender<()>), Error> {
/// Trace pipe path relative to trace mount point
static TRACE_PIPE_PATH: &str = "trace_pipe";
/// Trace file path relative to trace mount point
static TRACE_FILE_PATH: &str = "trace";
let configs = TracerConfigs::new(
kb_buffer_size,
setup_tracing,
tracer_type.clone(),
None,
tracing_instance,
)?;
let pipe_path = Path::new(&configs.trace_base_path).join(TRACE_PIPE_PATH);
let trace_pipe = File::open(&pipe_path)
.map_err(|e| Error::Open { source: e, path: pipe_path.to_str().unwrap().to_owned() })?;
let file_path = Path::new(&configs.trace_base_path).join(TRACE_FILE_PATH);
let trace_file = File::open(&file_path)
.map_err(|e| Error::Open { source: e, path: file_path.to_str().unwrap().to_owned() })?;
let tracer: Box<dyn TraceSubsystem + Send> = match tracer_type {
TracerType::Mem => Box::new(MemTraceSubsystem::create_with_configs(configs)?),
};
Self::create_with_config(trace_file, trace_pipe, tracer)
}
fn create_with_config(
file: File,
pipe: File,
tracer: Box<dyn TraceSubsystem + Send>,
) -> Result<(Self, Sender<()>), Error> {
let (exit_tx, exit_rx) = mpsc::channel();
let trace_pipe = BufReader::new(pipe);
let trace_file = BufReader::new(file);
Ok((Self { trace_file, trace_pipe, exit_rx, tracing_subsystem: tracer }, exit_tx))
}
fn save_intermediate_state(&self, intermediate_file: Option<&PathBuf>) -> Result<(), Error> {
if let Some(int_path) = intermediate_file {
let mut tmp_file = int_path.clone();
tmp_file.set_extension("int.tmp");
let mut out_file = File::create(&tmp_file).map_err(|source| Error::Create {
source,
path: int_path.to_str().unwrap().to_owned(),
})?;
self.tracing_subsystem.serialize(&mut out_file)?;
rename(&tmp_file, int_path).map_err(|e| Error::Custom {
error: format!(
"rename file from{} to:{} failed with {}",
tmp_file.to_str().unwrap(),
int_path.to_str().unwrap(),
e
),
})?;
}
Ok(())
}
/// This routine parses all the events since last reset of trace buffer.
///
/// The linux tracing subsystem exposes two interfaces to get trace events from
/// 1. a file - usually at `/sys/kernel/tracing/trace`
/// 2. a pipe - usually at `/sys/kernel/tracing/trace_pipe`
///
/// The file is *sort of* ring buffer which works off of `buffer_size_kb` sized buffer.
/// Relying on it is not very efficient as we end up getting a lot of duplicates.
///
/// The pipe only contains line traces. Any trace events that occurred before opening
/// of this file are lost.
///
/// IMPORTANT: The moment we start reading from the pipe, the events in the file
/// disappear/reset. So we should read file entirely before we start reading the pipe.
pub fn trace(&mut self, intermediate_file: Option<&PathBuf>) -> Result<RecordsFile, Error> {
let mut buf = String::new();
self.trace_file
.read_to_string(&mut buf)
.map_err(|e| Error::Read { error: format!("failed to read trace file: {}", e) })?;
for line in buf.lines() {
let trimmed = line.trim_end();
self.tracing_subsystem.add_line(trimmed)?;
}
// The logic here is to block on trace_pipe forever. We break out of loop only when we read
// a line from the pipe *and* we have received an event on exit_rx.
// This logic works because the system will have one or more read syscalls and also we,
// at the moment, use prefetch on build systems and not in production to generate records
// file.
//
// TODO(b/302045304): async read trace_pipe.
while self.exit_rx.try_recv().is_err() {
let mut line = String::new();
let len = self
.trace_pipe
.read_line(&mut line)
.map_err(|e| Error::Read { error: e.to_string() })?;
let trimmed = line.trim_end();
if len == 0 {
// We should never read zero length line or reach EOF of the pipe.
return Err(Error::Read {
error: "read zero length line from trace_pipe".to_string(),
});
}
self.tracing_subsystem.add_line(trimmed)?;
}
// We are here because the above loop exited normally. Traced lines are stored in `Self`.
// Build `RecordsFile` from processing data from read lines above.
self.save_intermediate_state(intermediate_file)?;
let rf = self.tracing_subsystem.build_records_file()?;
self.save_intermediate_state(intermediate_file)?;
Ok(rf)
}
}
#[cfg(test)]
pub(crate) mod tests {
use crate::RecordsFile;
use std::alloc::Layout;
use std::borrow::ToOwned;
use std::convert::TryInto;
use std::fs::{create_dir_all, OpenOptions};
use std::io::Read;
use std::io::Seek;
use std::io::Write;
use std::ops::Range;
use std::os::linux::fs::MetadataExt;
use std::os::unix::fs::symlink;
use std::os::unix::prelude::OpenOptionsExt;
use std::path::Path;
use std::thread;
use std::time::Duration;
use std::{assert_eq, env};
use libc::O_DIRECT;
use nix::sys::stat::{major, minor};
use nix::unistd::pipe;
use rand::distributions::Alphanumeric;
use rand::Rng;
use tempfile::NamedTempFile;
use super::*;
use crate::replay::tests::generate_cached_files_and_record;
use std::ops::{Deref, DerefMut};
#[test]
fn trace_event_file_enable_and_restore() {
let mut file = NamedTempFile::new().unwrap();
let _ = file.write("0".as_bytes()).unwrap();
{
let _e = TraceEventFile::enable(file.path().to_owned()).unwrap();
assert_eq!(read_to_string(file.path()).unwrap(), "1");
}
assert_eq!(read_to_string(file.path()).unwrap(), "0");
}
#[test]
fn trace_event_file_write_and_restore() {
let mut file = NamedTempFile::new().unwrap();
let _ = file.write("hello".as_bytes()).unwrap();
{
let _e = TraceEventFile::write(file.path().to_owned(), "world").unwrap();
assert_eq!(read_to_string(file.path()).unwrap(), "world");
}
assert_eq!(read_to_string(file.path()).unwrap(), "hello");
}
fn setup_trace_mount_point(
create_mount_point: bool,
create_instances: bool,
instance_name: Option<String>,
) -> PathBuf {
assert!(
create_mount_point || !create_instances,
"cannot create instances without creating mount point"
);
let mount_point = env::temp_dir().join(
rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(10)
.map(char::from)
.collect::<String>(),
);
let mut base_path = Path::new(&mount_point).to_owned();
if create_mount_point {
create_dir(&mount_point).unwrap();
}
if create_instances {
base_path = base_path.join("instances");
if let Some(instance_name) = &instance_name {
base_path = base_path.join(instance_name)
}
create_dir_all(&base_path).unwrap();
}
if create_mount_point || create_instances {
std::fs::write(&base_path.join("buffer_size_kb"), "100").unwrap();
std::fs::write(&base_path.join("tracing_on"), "0").unwrap();
std::fs::write(&base_path.join("trace"), "0").unwrap();
std::fs::write(&base_path.join("trace_pipe"), "0").unwrap();
for event in [
"events/fs/do_sys_open",
"events/fs/open_exec",
"events/fs/uselib",
"events/filemap/mm_filemap_add_to_page_cache",
] {
let event_path = base_path.join(event);
std::fs::create_dir_all(&event_path).unwrap();
std::fs::write(&event_path.join("enable"), "0").unwrap();
}
}
mount_point
}
#[test]
fn test_configs_no_setup() {
let mount_point = setup_trace_mount_point(true, true, None);
let _configs = TracerConfigs::new(
Some(10),
false,
TracerType::Mem,
Some(mount_point.to_str().unwrap().to_owned()),
None,
)
.unwrap();
}
#[test]
fn test_configs_no_setup_no_mount_point() {
let mount_point = setup_trace_mount_point(false, false, None);
assert_eq!(
TracerConfigs::new(
Some(10),
false,
TracerType::Mem,
Some(mount_point.to_str().unwrap().to_owned()),
None,
)
.unwrap_err()
.to_string(),
format!(
"Failed to setup prefetch: trace mount point doesn't exist: {}",
mount_point.to_str().unwrap()
)
);
}
#[test]
fn test_configs_no_setup_no_instances() {
let mount_point = setup_trace_mount_point(true, false, None);
assert_eq!(
TracerConfigs::new(
Some(10),
false,
TracerType::Mem,
Some(mount_point.to_str().unwrap().to_owned()),
Some("my_instance".to_owned()),
)
.unwrap_err()
.to_string(),
format!(
"Failed to setup prefetch: trace mount point doesn't exist: {}/instances/my_instance",
mount_point.to_str().unwrap()
)
);
}
#[test]
fn test_configs_setup_without_instances() {
let mount_point = setup_trace_mount_point(true, false, None);
assert!(TracerConfigs::new(
Some(10),
true,
TracerType::Mem,
Some(mount_point.to_str().unwrap().to_owned()),
None
)
.is_ok());
}
#[test]
fn test_configs_setup_with_instances() {
let mount_point = setup_trace_mount_point(true, true, Some("my_instance".to_owned()));
assert!(TracerConfigs::new(
Some(10),
true,
TracerType::Mem,
Some(mount_point.to_str().unwrap().to_owned()),
Some("my_instance".to_owned())
)
.is_ok())
}
pub(crate) fn setup_test_dir() -> PathBuf {
let test_base_dir: String = rand::thread_rng()
.sample_iter(&rand::distributions::Alphanumeric)
.take(7)
.map(char::from)
.collect();
let test_base_dir = format!(
"{}/test/{}",
std::fs::read_link("/proc/self/exe").unwrap().parent().unwrap().to_str().unwrap(),
test_base_dir
);
std::fs::create_dir_all(&test_base_dir).unwrap();
PathBuf::from(test_base_dir)
}
fn modify_records_file(rf: &RecordsFile, target: &str) -> RecordsFile {
let mut modified_rf = rf.clone();
for inode in modified_rf.inner.inode_map.values_mut() {
let new_paths: Vec<String> = inode
.paths
.iter()
.map(|s| {
let parent = Path::new(s).parent().unwrap().to_str().unwrap();
s.replace(parent, target)
})
.collect();
inode.paths = new_paths;
}
modified_rf
}
struct AlignedBuffer {
ptr: *mut u8,
len: usize,
layout: Layout,
}
impl AlignedBuffer {
fn new(size: usize, alignment: usize) -> Result<Self, Error> {
if size == 0 {
return Err(Error::Custom { error: "cannot allocate zero bytes".to_string() });
}
let layout = Layout::from_size_align(size, alignment).unwrap();
// SAFETY:
// - `size` is a valid non-zero positive integer representing the desired buffer size.
// - The layout is checked for validity using `.unwrap()`.
let ptr = unsafe { std::alloc::alloc(layout) };
if ptr.is_null() {
return Err(Error::Custom { error: format!("alloc failed: size: {}", size) });
}
Ok(AlignedBuffer { ptr, len: size, layout })
}
}
impl Deref for AlignedBuffer {
type Target = [u8];
// SAFETY:
// - self.ptr is a valid pointer obtained from a successful allocation in the new() method.
// - self.len is a valid length used for allocation in the new() method.
fn deref(&self) -> &Self::Target {
unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
}
}
impl DerefMut for AlignedBuffer {
// SAFETY:
// - self.ptr is a valid pointer obtained from a successful allocation in the new() method.
// - self.len is a valid length used for allocation in the new() method.
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) }
}
}
impl Drop for AlignedBuffer {
fn drop(&mut self) {
// SAFETY:
// - self.ptr is a valid pointer obtained from a successful allocation in the new() method.
// - self.layout is the Layout used to allocate the memory.
unsafe {
std::alloc::dealloc(self.ptr, self.layout);
}
}
}
// Copies `files` into directory pointed by `base`.
//
// The newly created file's data is potentially uncached - i.e. the new
// files are opened in O_DIRECT.
//
// WARNING: Though this function makes an attempt to copy into uncached files
// but it cannot guarantee as other processes in the system may access the
// files. This may lead to flaky tests or unexpected results.
pub(crate) fn copy_uncached_files_and_record_from(
base: &Path,
files: &mut [(NamedTempFile, Vec<Range<u64>>)],
rf: &RecordsFile,
) -> (RecordsFile, Vec<(PathBuf, Vec<Range<u64>>)>) {
let mut new_files = vec![];
for (in_file, ranges) in files {
let out_path = base.join(in_file.path().file_name().unwrap());
let mut out_file = OpenOptions::new()
.read(true)
.write(true)
.custom_flags(O_DIRECT)
.create_new(true)
.open(&out_path)
.expect("Can't open");
let page_size = page_size().unwrap() as u64;
let in_file_size = in_file.metadata().unwrap().len();
assert_eq!(
in_file_size % page_size,
0,
"we create files that are aligned to page size"
);
let out_file_size = in_file_size;
let mut buf =
AlignedBuffer::new(out_file_size.try_into().unwrap(), page_size as usize).unwrap();
let _ = in_file.read(&mut *buf).unwrap();
out_file.write_all(&*buf).unwrap();
new_files.push((out_path, ranges.clone()));
}
for inode in rf.inner.inode_map.values() {
for path in &inode.paths {
let in_path = Path::new(&path);
let out_path = base.join(in_path.file_name().unwrap());
if !out_path.exists() {
let orig_file =
out_path.file_name().unwrap().to_str().unwrap().replace("-symlink", "");
symlink(orig_file, out_path.to_str().unwrap()).unwrap();
new_files.push((out_path.to_owned(), vec![]));
}
}
}
let modified_rf = modify_records_file(rf, base.to_str().unwrap());
(modified_rf, new_files)
}
// Generates mem trace string from given args. Sometimes injects lines that are of no importance
fn mem_generate_trace_line_for_open(path: &Path, time: u16, _op: Option<&str>) -> Vec<String> {
let op = "mm_filemap_add_to_page_cache";
let stat = path.metadata().unwrap();
let major_no = major(stat.st_dev());
let minor_no = minor(stat.st_dev());
let inode_number = stat.st_ino();
vec![
// unknown operation
format!(
" SettingsProvide-502 [001] .... {}.{}: {}: dev {}:{} ino {:x} \
page=000000008b759458 pfn=59827 ofs=0",
time,
(time * 100) + time,
"unknown_operation",
major_no,
minor_no,
inode_number,
),
// invalid/relative inode
format!(
" SettingsProvide-502 [001] .... {}.{}: {}: dev {}:{} ino {:x} \
page=000000008b759458 pfn=59827 ofs=0",
time,
(time * 100) + time,
"unknown_operation",
major_no,
minor_no,
inode_number + 100,
),
// good one
format!(
" BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: dev {}:{} ino {:x} \
page=00000000f936540b pfn=60952 ofs={}",
time,
(time * 100) + time,
op,
major_no,
minor_no,
inode_number,
0
),
// good one
format!(
" BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: dev {}:{} ino {:x} \
page=00000000f936540b pfn=60952 ofs={}",
time,
(time * 100) + time,
op,
major_no,
minor_no,
inode_number,
10_000,
),
// good one
format!(
" BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: dev {}:{} ino {:x} \
page=00000000f936540b pfn=60952 ofs={}",
time,
(time * 100) + time,
op,
major_no,
minor_no,
inode_number,
100_000,
),
// good one
format!(
" BOX_ENTRY_ADDED-3071 [001] .... {}.{}: {}: dev {}:{} ino {:x} \
page=00000000f936540b pfn=60952 ofs={}",
time,
(time * 100) + time,
op,
major_no,
minor_no,
inode_number,
1_000_000,
),
// invalid operation case
format!(
" SettingsProvide-502 [001] .... {}.{}: {}: dev {}:{} ino {:x} \
page=000000008b759458 pfn=59827 ofs=0",
time,
(time * 100) + time,
op.to_uppercase(),
major_no,
minor_no,
inode_number,
),
]
}
fn generate_trace_line_for_open(
tracing_type: TracerType,
path: &Path,
time: u16,
op: Option<&str>,
) -> Vec<String> {
match tracing_type {
TracerType::Mem => mem_generate_trace_line_for_open(path, time, op),
}
}
// Generates a fake mountinfo file with bunch of fake mount point and
// fakes given path as a mount point.
fn create_fake_mountinfo_for(path: &Path) -> NamedTempFile {
let stat = path.metadata().unwrap();
let major_no = major(stat.st_dev());
let minor_no = minor(stat.st_dev());
let mut mountinfo_path = NamedTempFile::new().unwrap();
mountinfo_path
.write_all(
"16 15 0:17 / /dev/pts rw,relatime shared:3 - devpts devpts \
rw,seclabel,mode=600,ptmxmode=000\n"
.as_bytes(),
)
.unwrap();
mountinfo_path
.write_all(
"17 26 0:18 / /proc rw,relatime shared:4 - proc proc rw,gid=3009,hidepid=\
invisible\n"
.as_bytes(),
)
.unwrap();
mountinfo_path
.write_all(
format!(
"26 24 {}:{} / {} ro,nodev,noatime shared:1 - ext4 /dev/block/dm-3 ro,\
seclabel,errors=panic\n",
major_no,
minor_no,
path.to_str().unwrap(),
)
.as_bytes(),
)
.unwrap();
mountinfo_path
}
static RECORD_PER_FILE: usize = 4;
fn create_tracer(
base_dir: &Path,
t: TracerType,
) -> (Box<dyn TraceSubsystem + Send>, Vec<NamedTempFile>) {
let kb_buffer_size = Some(8388608);
let trace_mount_point = setup_test_dir();
let mut buffer_size_file = NamedTempFile::new_in(&trace_mount_point).unwrap();
buffer_size_file
.write_all(format!("{}", kb_buffer_size.as_ref().unwrap()).as_bytes())
.unwrap();
let buffer_size_file_path = buffer_size_file.path().to_str().unwrap().to_string();
let mut config = TracerConfigs::new(
kb_buffer_size,
false,
t.clone(),
Some(trace_mount_point.to_str().unwrap().to_string()),
None,
)
.unwrap();
let mut tempfiles = vec![buffer_size_file];
(
match t {
TracerType::Mem => {
let mountinfo_path =
create_fake_mountinfo_for(&base_dir.canonicalize().unwrap());
config.trace_events = vec![];
config.buffer_size_file_path = buffer_size_file_path;
config.mountinfo_path =
Some(mountinfo_path.path().to_str().unwrap().to_string());
tempfiles.push(mountinfo_path);
Box::new(MemTraceSubsystem::create_with_configs(config).unwrap())
}
},
tempfiles,
)
}
fn test_trace_of_type(tracing_type: TracerType) {
let test_base_dir = setup_test_dir();
let (_rf, files) = generate_cached_files_and_record(
Some(&test_base_dir),
true,
Some(page_size().unwrap() as u64),
);
let mut file = NamedTempFile::new().unwrap();
let (reader_fd, writer_fd) = pipe().unwrap();
let reader = File::from(reader_fd);
let mut writer = File::from(writer_fd);
let (tracer, _temp_files) = create_tracer(&test_base_dir, tracing_type.clone());
let mut files_iter = files.iter();
for line in generate_trace_line_for_open(
tracing_type.clone(),
files_iter.next().unwrap().0.path(),
5,
None,
) {
writeln!(file, "{}", line).unwrap();
}
file.sync_all().unwrap();
file.seek(std::io::SeekFrom::Start(0)).unwrap();
let (mut tracer, exit_evt) =
Tracer::create_with_config(file.reopen().unwrap(), reader, tracer).unwrap();
let thd = thread::spawn(move || tracer.trace(None));
for (index, file) in files_iter.enumerate() {
for line in generate_trace_line_for_open(tracing_type.clone(), file.0.path(), 10, None)
{
writeln!(&mut writer, "{}", line).unwrap();
}
if index == 0 {
// This sleep emulates delay in data arriving over a pipe. This shouldn't cause
// flakes in virtualized environment.
thread::sleep(Duration::from_secs(1));
}
}
thread::sleep(Duration::from_millis(100));
exit_evt.send(()).unwrap();
writeln!(&mut writer, "line").unwrap();
let tracer_rf = thd.join().unwrap().unwrap();
let mut found_count = 0;
for file in &files {
let mut found = false;
'inner: for inode in tracer_rf.inner.inode_map.values() {
for found_path in &inode.paths {
if found_path == file.0.path().canonicalize().unwrap().to_str().unwrap() {
found = true;
break 'inner;
}
}
}
if found {
found_count += 1;
} else {
println!("missing {:?}", file.0.path());
}
}
assert_eq!(found_count, files.len());
assert_eq!(tracer_rf.inner.records.len(), files.len() * RECORD_PER_FILE);
}
#[test]
fn test_trace_mem() {
test_trace_of_type(TracerType::Mem)
}
}