libsnapshot_cow: Prepare device to boot from pre-created snapshots

Two new API's have been added:

1: BootFromSnapshotsWithoutSlotSwitch: This will create a new marker
which indicates first-stage init to mount the partitions off snapshots.

We need this marker as during boot, there are couple of places during
mounting snapshots wherein the marker is used. However, there is no
change in the existing I/O path related to OTA.

2: PrepareDeviceToBootWithoutSnapshot: This will delete the marker so
that subsequent reboot will not have the partitions mounted without the
snapshots.

VTS tests covers both these API's. Additionally, when these
markers are present, new OTA's cannot be installed. All these
are covered in VTS tests.

===========================================================

snapshotctl: General flow to apply and revert pre-created snapshots

1: To install the pre-created snapshots:

$snapshotctl map-snapshots <directory path containing snapshots patches>

Now the device is ready to boot from snapshots.

2: After device reboots, partitions are mounted off the snapshots. There
   is no snapshot-merge.

3: In order to go back to previous build:

$snapshotctl revert-snapshots

Now the device is ready to boot from base build.

4: After device reboots back to previous build, all the snapshot states
   and COW images are removed.

============================================

Additional commands:

To delete the pre-created snapshots:

$snapshotctl delete-snapshots

======================================

Tested it on Pixel 6 Pro between two builds which are ~24 hours apart.

1: Creating snapshots on a linux-host - ~4-6 seconds
2: Applying pre-created snapshots - ~10-15 seconds (includes intermediate
   transfer of patches to the device). This depends on the size of snapshot patches.
3: Device reboot - ~12-14 seconds.

Bug: 299011882
Test: 1: Apply pre-created snapshots
2: Reboot device: Verify new build
3: Apply OTA when partitions are mounted of snapshots and verify OTA
   fails.
3: Revert-snapshot and reboot.
4: Verify device goes back to base build.

Full OTA on Pixel. vts_libsnapshot_test

Change-Id: I36a72d973d8f70ae49773ebd45dd996fac22a4e3
Signed-off-by: Akilesh Kailash <akailash@google.com>
This commit is contained in:
Akilesh Kailash 2023-09-08 17:01:15 -07:00
parent b2b037d9f0
commit bcae6b5e60
4 changed files with 252 additions and 34 deletions

View file

@ -73,6 +73,9 @@ class ISnapshotMergeStats;
class SnapshotMergeStats;
class SnapshotStatus;
using std::chrono::duration_cast;
using namespace std::chrono_literals;
static constexpr const std::string_view kCowGroupName = "cow";
static constexpr char kVirtualAbCompressionProp[] = "ro.virtual_ab.compression.enabled";
@ -424,6 +427,7 @@ class SnapshotManager final : public ISnapshotManager {
FRIEND_TEST(SnapshotTest, MergeFailureCode);
FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot);
FRIEND_TEST(SnapshotTest, UpdateBootControlHal);
FRIEND_TEST(SnapshotTest, BootSnapshotWithoutSlotSwitch);
FRIEND_TEST(SnapshotUpdateTest, AddPartition);
FRIEND_TEST(SnapshotUpdateTest, ConsistencyCheckResume);
FRIEND_TEST(SnapshotUpdateTest, DaemonTransition);
@ -436,6 +440,7 @@ class SnapshotManager final : public ISnapshotManager {
FRIEND_TEST(SnapshotUpdateTest, QueryStatusError);
FRIEND_TEST(SnapshotUpdateTest, SnapshotStatusFileWithoutCow);
FRIEND_TEST(SnapshotUpdateTest, SpaceSwapUpdate);
FRIEND_TEST(SnapshotUpdateTest, MapAllSnapshotsWithoutSlotSwitch);
friend class SnapshotTest;
friend class SnapshotUpdateTest;
friend class FlashAfterUpdateTest;
@ -456,7 +461,7 @@ class SnapshotManager final : public ISnapshotManager {
bool EnsureImageManager();
// Ensure we're connected to snapuserd.
bool EnsureSnapuserdConnected();
bool EnsureSnapuserdConnected(std::chrono::milliseconds timeout_ms = 10s);
// Helpers for first-stage init.
const std::unique_ptr<IDeviceInfo>& device() const { return device_; }
@ -549,6 +554,16 @@ class SnapshotManager final : public ISnapshotManager {
// Unmap and remove all known snapshots.
bool RemoveAllSnapshots(LockedFile* lock);
// Boot device off snapshots without slot switch
bool BootFromSnapshotsWithoutSlotSwitch();
// Remove kBootSnapshotsWithoutSlotSwitch so that device can boot
// without snapshots on the current slot
bool PrepareDeviceToBootWithoutSnapshot();
// Is the kBootSnapshotsWithoutSlotSwitch present
bool IsSnapshotWithoutSlotSwitch();
// List the known snapshot names.
bool ListSnapshots(LockedFile* lock, std::vector<std::string>* snapshots,
const std::string& suffix = "");
@ -663,6 +678,7 @@ class SnapshotManager final : public ISnapshotManager {
std::string GetRollbackIndicatorPath();
std::string GetForwardMergeIndicatorPath();
std::string GetOldPartitionMetadataPath();
std::string GetBootSnapshotsWithoutSlotSwitchPath();
const LpMetadata* ReadOldPartitionMetadata(LockedFile* lock);

View file

@ -83,6 +83,8 @@ using std::chrono::duration_cast;
using namespace std::chrono_literals;
using namespace std::string_literals;
static constexpr char kBootSnapshotsWithoutSlotSwitch[] =
"/metadata/ota/snapshot-boot-without-slot-switch";
static constexpr char kBootIndicatorPath[] = "/metadata/ota/snapshot-boot";
static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
static constexpr auto kUpdateStateCheckInterval = 2s;
@ -217,6 +219,12 @@ bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
auto file = LockExclusive();
if (!file) return false;
if (IsSnapshotWithoutSlotSwitch()) {
LOG(ERROR) << "Cannot cancel the snapshots as partitions are mounted off the snapshots on "
"current slot.";
return false;
}
UpdateState state = ReadUpdateState(file.get());
if (state == UpdateState::None) {
RemoveInvalidSnapshots(file.get());
@ -299,10 +307,9 @@ bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock, const std::function
// - For ForwardMerge, FinishedSnapshotWrites asserts that the existence of the indicator
// matches the incoming update.
std::vector<std::string> files = {
GetSnapshotBootIndicatorPath(),
GetRollbackIndicatorPath(),
GetForwardMergeIndicatorPath(),
GetOldPartitionMetadataPath(),
GetSnapshotBootIndicatorPath(), GetRollbackIndicatorPath(),
GetForwardMergeIndicatorPath(), GetOldPartitionMetadataPath(),
GetBootSnapshotsWithoutSlotSwitchPath(),
};
for (const auto& file : files) {
RemoveFileIfExists(file);
@ -483,6 +490,32 @@ bool SnapshotManager::MapDmUserCow(LockedFile* lock, const std::string& name,
LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
return false;
}
} else if (IsSnapshotWithoutSlotSwitch()) {
// When snapshots are on current slot, we determine the size
// of block device based on the number of COW operations. We cannot
// use base device as it will be from older image.
size_t num_ops = 0;
uint64_t dev_sz = 0;
unique_fd fd(open(cow_file.c_str(), O_RDONLY | O_CLOEXEC));
if (fd < 0) {
PLOG(ERROR) << "Failed to open " << cow_file;
return false;
}
CowReader reader;
if (!reader.Parse(std::move(fd))) {
LOG(ERROR) << "Failed to parse cow " << cow_file;
return false;
}
const auto& header = reader.GetHeader();
if (header.prefix.major_version > 2) {
LOG(ERROR) << "COW format not supported";
return false;
}
num_ops = reader.get_num_total_data_ops();
dev_sz = (num_ops * header.block_size);
base_sectors = dev_sz >> 9;
} else {
// For userspace snapshots, the size of the base device is taken as the
// size of the dm-user block device. Since there is no pseudo mapping
@ -1479,6 +1512,10 @@ MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
return result;
}
std::string SnapshotManager::GetBootSnapshotsWithoutSlotSwitchPath() {
return metadata_dir_ + "/" + android::base::Basename(kBootSnapshotsWithoutSlotSwitch);
}
std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
}
@ -2120,6 +2157,10 @@ UpdateState SnapshotManager::GetUpdateState(double* progress) {
return state;
}
bool SnapshotManager::IsSnapshotWithoutSlotSwitch() {
return (access(GetBootSnapshotsWithoutSlotSwitchPath().c_str(), F_OK) == 0);
}
bool SnapshotManager::UpdateUsesCompression() {
auto lock = LockShared();
if (!lock) return false;
@ -2212,6 +2253,13 @@ std::string SnapshotManager::GetGlobalRollbackIndicatorPath() {
}
bool SnapshotManager::NeedSnapshotsInFirstStageMount() {
if (IsSnapshotWithoutSlotSwitch()) {
if (GetCurrentSlot() != Slot::Source) {
LOG(ERROR) << "Snapshots marked to boot without slot switch; but slot is wrong";
return false;
}
return true;
}
// If we fail to read, we'll wind up using CreateLogicalPartitions, which
// will create devices that look like the old slot, except with extra
// content at the end of each device. This will confuse dm-verity, and
@ -2347,7 +2395,8 @@ bool SnapshotManager::MapPartitionWithSnapshot(LockedFile* lock,
// completed, live_snapshot_status is set to nullopt.
std::optional<SnapshotStatus> live_snapshot_status;
do {
if (!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
if (!IsSnapshotWithoutSlotSwitch() &&
!(params.partition->attributes & LP_PARTITION_ATTR_UPDATED)) {
LOG(INFO) << "Detected re-flashing of partition, will skip snapshot: "
<< params.GetPartitionName();
break;
@ -2703,7 +2752,7 @@ bool SnapshotManager::UnmapUserspaceSnapshotDevice(LockedFile* lock,
// to unmap; hence, we can't be deleting the device
// as the table would be mounted off partitions and will fail.
if (snapshot_status.state() != SnapshotState::MERGE_COMPLETED) {
if (!DeleteDeviceIfExists(dm_user_name)) {
if (!DeleteDeviceIfExists(dm_user_name, 4000ms)) {
LOG(ERROR) << "Cannot unmap " << dm_user_name;
return false;
}
@ -3098,7 +3147,7 @@ bool SnapshotManager::EnsureImageManager() {
return true;
}
bool SnapshotManager::EnsureSnapuserdConnected() {
bool SnapshotManager::EnsureSnapuserdConnected(std::chrono::milliseconds timeout_ms) {
if (snapuserd_client_) {
return true;
}
@ -3107,7 +3156,7 @@ bool SnapshotManager::EnsureSnapuserdConnected() {
return false;
}
snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, 10s);
snapuserd_client_ = SnapuserdClient::Connect(kSnapuserdSocket, timeout_ms);
if (!snapuserd_client_) {
LOG(ERROR) << "Unable to connect to snapuserd";
return false;
@ -4372,13 +4421,70 @@ std::string SnapshotManager::ReadSourceBuildFingerprint() {
bool SnapshotManager::IsUserspaceSnapshotUpdateInProgress() {
auto slot = GetCurrentSlot();
if (slot == Slot::Target) {
// Merge in-progress
if (IsSnapuserdRequired()) {
return true;
}
}
// Let's check more deeper to see if snapshots are mounted
auto lock = LockExclusive();
if (!lock) {
return false;
}
std::vector<std::string> snapshots;
if (!ListSnapshots(lock.get(), &snapshots)) {
return false;
}
for (const auto& snapshot : snapshots) {
// Active snapshot and daemon is alive
if (IsSnapshotDevice(snapshot) && EnsureSnapuserdConnected(2s)) {
return true;
}
}
return false;
}
bool SnapshotManager::BootFromSnapshotsWithoutSlotSwitch() {
auto lock = LockExclusive();
if (!lock) return false;
auto contents = device_->GetSlotSuffix();
// This is the indicator which tells first-stage init
// to boot from snapshots even though there was no slot-switch
auto boot_file = GetBootSnapshotsWithoutSlotSwitchPath();
if (!WriteStringToFileAtomic(contents, boot_file)) {
PLOG(ERROR) << "write failed: " << boot_file;
return false;
}
SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
update_status.set_state(UpdateState::Initiated);
update_status.set_userspace_snapshots(true);
update_status.set_using_snapuserd(true);
if (!WriteSnapshotUpdateStatus(lock.get(), update_status)) {
return false;
}
return true;
}
bool SnapshotManager::PrepareDeviceToBootWithoutSnapshot() {
auto lock = LockExclusive();
if (!lock) return false;
android::base::RemoveFileIfExists(GetSnapshotBootIndicatorPath());
android::base::RemoveFileIfExists(GetBootSnapshotsWithoutSlotSwitchPath());
SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock.get());
update_status.set_state(UpdateState::Cancelled);
if (!WriteSnapshotUpdateStatus(lock.get(), update_status)) {
return false;
}
return true;
}
} // namespace snapshot
} // namespace android

View file

@ -2559,6 +2559,56 @@ TEST_F(SnapshotUpdateTest, DaemonTransition) {
}
}
TEST_F(SnapshotUpdateTest, MapAllSnapshotsWithoutSlotSwitch) {
MountMetadata();
AddOperationForPartitions();
// Execute the update.
ASSERT_TRUE(sm->BeginUpdate());
ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));
if (!sm->UpdateUsesUserSnapshots()) {
GTEST_SKIP() << "Test does not apply as UserSnapshots aren't enabled.";
}
ASSERT_TRUE(WriteSnapshots());
ASSERT_TRUE(sm->FinishedSnapshotWrites(false));
if (ShouldSkipLegacyMerging()) {
GTEST_SKIP() << "Skipping legacy merge test";
}
// Mark the indicator
ASSERT_TRUE(sm->BootFromSnapshotsWithoutSlotSwitch());
ASSERT_TRUE(sm->EnsureSnapuserdConnected());
sm->set_use_first_stage_snapuserd(true);
ASSERT_TRUE(sm->NeedSnapshotsInFirstStageMount());
// Map snapshots
ASSERT_TRUE(sm->MapAllSnapshots(10s));
// New updates should fail
ASSERT_FALSE(sm->BeginUpdate());
// Snapshots cannot be cancelled
ASSERT_FALSE(sm->CancelUpdate());
// Merge cannot start
ASSERT_FALSE(sm->InitiateMerge());
// Read bytes back and verify they match the cache.
ASSERT_TRUE(IsPartitionUnchanged("sys_b"));
// Remove the indicators
ASSERT_TRUE(sm->PrepareDeviceToBootWithoutSnapshot());
// Ensure snapshots are still mounted
ASSERT_TRUE(sm->IsUserspaceSnapshotUpdateInProgress());
// Cleanup snapshots
ASSERT_TRUE(sm->UnmapAllSnapshots());
}
TEST_F(SnapshotUpdateTest, MapAllSnapshots) {
AddOperationForPartitions();
// Execute the update.

View file

@ -75,7 +75,11 @@ int Usage() {
" unmap-snapshots\n"
" Unmap all pre-created snapshots\n"
" delete-snapshots\n"
" Delete all pre-created snapshots\n";
" Delete all pre-created snapshots\n"
" revert-snapshots\n"
" Prepares devices to boot without snapshots on next boot.\n"
" This does not delete the snapshot. It only removes the indicators\n"
" so that first stage init will not mount from snapshots.\n";
return EX_USAGE;
}
@ -87,9 +91,11 @@ class MapSnapshots {
MapSnapshots(std::string path = "");
bool CreateSnapshotDevice(std::string& partition_name, std::string& patch);
bool InitiateThreadedSnapshotWrite(std::string& pname, std::string& snapshot_patch);
bool WaitForSnapshotWritesToComplete();
bool FinishSnapshotWrites();
bool UnmapCowImagePath(std::string& name);
bool DeleteCowImage(std::string& name);
bool DeleteSnapshots();
bool CleanupSnapshot() { return sm_->PrepareDeviceToBootWithoutSnapshot(); }
bool BeginUpdate();
private:
std::optional<std::string> GetCowImagePath(std::string& name);
@ -107,7 +113,24 @@ MapSnapshots::MapSnapshots(std::string path) {
exit(1);
}
snapshot_dir_path_ = path + "/";
}
bool MapSnapshots::BeginUpdate() {
lock_ = sm_->LockExclusive();
std::vector<std::string> snapshots;
sm_->ListSnapshots(lock_.get(), &snapshots);
if (!snapshots.empty()) {
// Snapshots are already present.
return true;
}
lock_ = nullptr;
if (!sm_->BeginUpdate()) {
LOG(ERROR) << "BeginUpdate failed";
return false;
}
lock_ = sm_->LockExclusive();
return true;
}
bool MapSnapshots::CreateSnapshotDevice(std::string& partition_name, std::string& patchfile) {
@ -130,6 +153,9 @@ bool MapSnapshots::CreateSnapshotDevice(std::string& partition_name, std::string
dev_sz &= ~(block_sz - 1);
SnapshotStatus status;
status.set_state(SnapshotState::CREATED);
status.set_using_snapuserd(true);
status.set_old_partition_size(0);
status.set_name(partition_name);
status.set_cow_file_size(dev_sz);
status.set_cow_partition_size(0);
@ -216,27 +242,33 @@ bool MapSnapshots::InitiateThreadedSnapshotWrite(std::string& pname, std::string
return true;
}
bool MapSnapshots::WaitForSnapshotWritesToComplete() {
bool MapSnapshots::FinishSnapshotWrites() {
bool ret = true;
for (auto& t : threads_) {
ret = t.get() && ret;
}
lock_ = nullptr;
if (ret) {
LOG(INFO) << "Pre-created snapshots successfully copied";
} else {
LOG(ERROR) << "Snapshot copy failed";
if (!sm_->FinishedSnapshotWrites(false)) {
return false;
}
return sm_->BootFromSnapshotsWithoutSlotSwitch();
}
return ret;
LOG(ERROR) << "Snapshot copy failed";
return false;
}
bool MapSnapshots::UnmapCowImagePath(std::string& name) {
return sm_->UnmapCowImage(name);
}
bool MapSnapshots::DeleteCowImage(std::string& name) {
if (!sm_->DeleteSnapshot(lock_.get(), name)) {
LOG(ERROR) << "Delete snapshot failed";
bool MapSnapshots::DeleteSnapshots() {
lock_ = sm_->LockExclusive();
if (!sm_->RemoveAllUpdateState(lock_.get())) {
LOG(ERROR) << "Remove All Update State failed";
return false;
}
return true;
@ -281,7 +313,8 @@ bool GetVerityPartitions(std::vector<std::string>& partitions) {
return true;
}
bool UnMapPrecreatedSnapshots(int, char**) {
bool UnMapPrecreatedSnapshots(int, char** argv) {
android::base::InitLogging(argv, &android::base::KernelLogger);
// Make sure we are root.
if (::getuid() != 0) {
LOG(ERROR) << "Not running as root. Try \"adb root\" first.";
@ -302,29 +335,36 @@ bool UnMapPrecreatedSnapshots(int, char**) {
return true;
}
bool DeletePrecreatedSnapshots(int, char**) {
bool RemovePrecreatedSnapshots(int, char** argv) {
android::base::InitLogging(argv, &android::base::KernelLogger);
// Make sure we are root.
if (::getuid() != 0) {
LOG(ERROR) << "Not running as root. Try \"adb root\" first.";
return false;
}
MapSnapshots snapshot;
if (!snapshot.CleanupSnapshot()) {
LOG(ERROR) << "CleanupSnapshot failed";
return false;
}
return true;
}
bool DeletePrecreatedSnapshots(int, char** argv) {
android::base::InitLogging(argv, &android::base::KernelLogger);
// Make sure we are root.
if (::getuid() != 0) {
LOG(ERROR) << "Not running as root. Try \"adb root\" first.";
return EXIT_FAILURE;
}
std::vector<std::string> partitions;
if (!GetVerityPartitions(partitions)) {
return false;
}
MapSnapshots snapshot;
for (auto partition : partitions) {
if (!snapshot.DeleteCowImage(partition)) {
LOG(ERROR) << "DeleteCowImage failed: " << partition;
}
}
return true;
return snapshot.DeleteSnapshots();
}
bool MapPrecreatedSnapshots(int argc, char** argv) {
android::base::InitLogging(argv, &android::base::StderrLogger);
android::base::InitLogging(argv, &android::base::KernelLogger);
// Make sure we are root.
if (::getuid() != 0) {
@ -365,6 +405,11 @@ bool MapPrecreatedSnapshots(int argc, char** argv) {
}
MapSnapshots cow(path);
if (!cow.BeginUpdate()) {
LOG(ERROR) << "BeginUpdate failed";
return false;
}
for (auto& pair : partitions) {
if (!cow.CreateSnapshotDevice(pair.first, pair.second)) {
LOG(ERROR) << "CreateSnapshotDevice failed for: " << pair.first;
@ -376,7 +421,7 @@ bool MapPrecreatedSnapshots(int argc, char** argv) {
}
}
return cow.WaitForSnapshotWritesToComplete();
return cow.FinishSnapshotWrites();
}
#ifdef SNAPSHOTCTL_USERDEBUG_OR_ENG
@ -508,6 +553,7 @@ static std::map<std::string, std::function<bool(int, char**)>> kCmdMap = {
{"map-snapshots", MapPrecreatedSnapshots},
{"unmap-snapshots", UnMapPrecreatedSnapshots},
{"delete-snapshots", DeletePrecreatedSnapshots},
{"revert-snapshots", RemovePrecreatedSnapshots},
// clang-format on
};