blob: 12259472c167dace9c47b53ae8105a03faf9db3c [file] [log] [blame]
// Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "update_engine/payload_state.h"
#include <algorithm>
#include <base/logging.h>
#include <base/stringprintf.h>
#include "update_engine/prefs.h"
#include "update_engine/utils.h"
using base::Time;
using base::TimeDelta;
using std::min;
using std::string;
namespace chromeos_update_engine {
// We want to upperbound backoffs to 16 days
static const uint32_t kMaxBackoffDays = 16;
// We want to randomize retry attempts after the backoff by +/- 6 hours.
static const uint32_t kMaxBackoffFuzzMinutes = 12 * 60;
bool PayloadState::Initialize(PrefsInterface* prefs) {
CHECK(prefs);
prefs_ = prefs;
LoadResponseSignature();
LoadPayloadAttemptNumber();
LoadUrlIndex();
LoadUrlFailureCount();
LoadBackoffExpiryTime();
return true;
}
void PayloadState::SetResponse(const OmahaResponse& omaha_response) {
// Always store the latest response.
response_ = omaha_response;
// Check if the "signature" of this response (i.e. the fields we care about)
// has changed.
string new_response_signature = CalculateResponseSignature();
bool has_response_changed = (response_signature_ != new_response_signature);
// If the response has changed, we should persist the new signature and
// clear away all the existing state.
if (has_response_changed) {
LOG(INFO) << "Resetting all persisted state as this is a new response";
SetResponseSignature(new_response_signature);
ResetPersistedState();
return;
}
// This is the earliest point at which we can validate whether the URL index
// we loaded from the persisted state is a valid value. If the response
// hasn't changed but the URL index is invalid, it's indicative of some
// tampering of the persisted state.
if (url_index_ >= GetNumUrls()) {
LOG(INFO) << "Resetting all payload state as the url index seems to have "
"been tampered with";
ResetPersistedState();
return;
}
}
void PayloadState::DownloadComplete() {
LOG(INFO) << "Payload downloaded successfully";
IncrementPayloadAttemptNumber();
}
void PayloadState::DownloadProgress(size_t count) {
if (count == 0)
return;
// We've received non-zero bytes from a recent download operation. Since our
// URL failure count is meant to penalize a URL only for consecutive
// failures, downloading bytes successfully means we should reset the failure
// count (as we know at least that the URL is working). In future, we can
// design this to be more sophisticated to check for more intelligent failure
// patterns, but right now, even 1 byte downloaded will mark the URL to be
// good unless it hits 10 (or configured number of) consecutive failures
// again.
if (GetUrlFailureCount() == 0)
return;
LOG(INFO) << "Resetting failure count of Url" << GetUrlIndex()
<< " to 0 as we received " << count << " bytes successfully";
SetUrlFailureCount(0);
}
void PayloadState::UpdateFailed(ActionExitCode error) {
ActionExitCode base_error = utils::GetBaseErrorCode(error);
LOG(INFO) << "Updating payload state for error code: " << base_error
<< " (" << utils::CodeToString(base_error) << ")";
if (GetNumUrls() == 0) {
// This means we got this error even before we got a valid Omaha response.
// So we should not advance the url_index_ in such cases.
LOG(INFO) << "Ignoring failures until we get a valid Omaha response.";
return;
}
switch (base_error) {
// Errors which are good indicators of a problem with a particular URL or
// the protocol used in the URL or entities in the communication channel
// (e.g. proxies). We should try the next available URL in the next update
// check to quickly recover from these errors.
case kActionCodePayloadHashMismatchError:
case kActionCodePayloadSizeMismatchError:
case kActionCodeDownloadPayloadVerificationError:
case kActionCodeDownloadPayloadPubKeyVerificationError:
case kActionCodeSignedDeltaPayloadExpectedError:
case kActionCodeDownloadInvalidMetadataMagicString:
case kActionCodeDownloadSignatureMissingInManifest:
case kActionCodeDownloadManifestParseError:
case kActionCodeDownloadMetadataSignatureError:
case kActionCodeDownloadMetadataSignatureVerificationError:
case kActionCodeDownloadMetadataSignatureMismatch:
case kActionCodeDownloadOperationHashVerificationError:
case kActionCodeDownloadOperationExecutionError:
case kActionCodeDownloadOperationHashMismatch:
case kActionCodeDownloadInvalidMetadataSize:
case kActionCodeDownloadInvalidMetadataSignature:
case kActionCodeDownloadOperationHashMissingError:
case kActionCodeDownloadMetadataSignatureMissingError:
IncrementUrlIndex();
break;
// Errors which seem to be just transient network/communication related
// failures and do not indicate any inherent problem with the URL itself.
// So, we should keep the current URL but just increment the
// failure count to give it more chances. This way, while we maximize our
// chances of downloading from the URLs that appear earlier in the response
// (because download from a local server URL that appears earlier in a
// response is preferable than downloading from the next URL which could be
// a internet URL and thus could be more expensive).
case kActionCodeError:
case kActionCodeDownloadTransferError:
case kActionCodeDownloadWriteError:
case kActionCodeDownloadStateInitializationError:
case kActionCodeOmahaErrorInHTTPResponse: // Aggregate code for HTTP errors.
IncrementFailureCount();
break;
// Errors which are not specific to a URL and hence shouldn't result in
// the URL being penalized. This can happen in two cases:
// 1. We haven't started downloading anything: These errors don't cost us
// anything in terms of actual payload bytes, so we should just do the
// regular retries at the next update check.
// 2. We have successfully downloaded the payload: In this case, the
// payload attempt number would have been incremented and would take care
// of the backoff at the next update check.
// In either case, there's no need to update URL index or failure count.
case kActionCodeOmahaRequestError:
case kActionCodeOmahaResponseHandlerError:
case kActionCodePostinstallRunnerError:
case kActionCodeFilesystemCopierError:
case kActionCodeInstallDeviceOpenError:
case kActionCodeKernelDeviceOpenError:
case kActionCodeDownloadNewPartitionInfoError:
case kActionCodeNewRootfsVerificationError:
case kActionCodeNewKernelVerificationError:
case kActionCodePostinstallBootedFromFirmwareB:
case kActionCodeOmahaRequestEmptyResponseError:
case kActionCodeOmahaRequestXMLParseError:
case kActionCodeOmahaResponseInvalid:
case kActionCodeOmahaUpdateIgnoredPerPolicy:
case kActionCodeOmahaUpdateDeferredPerPolicy:
case kActionCodeOmahaUpdateDeferredForBackoff:
case kActionCodePostinstallPowerwashError:
LOG(INFO) << "Not incrementing URL index or failure count for this error";
break;
case kActionCodeSuccess: // success code
case kActionCodeSetBootableFlagError: // unused
case kActionCodeUmaReportedMax: // not an error code
case kActionCodeOmahaRequestHTTPResponseBase: // aggregated already
case kActionCodeDevModeFlag: // not an error code
case kActionCodeResumedFlag: // not an error code
case kActionCodeTestImageFlag: // not an error code
case kActionCodeTestOmahaUrlFlag: // not an error code
case kSpecialFlags: // not an error code
// These shouldn't happen. Enumerating these explicitly here so that we
// can let the compiler warn about new error codes that are added to
// action_processor.h but not added here.
LOG(WARNING) << "Unexpected error code for UpdateFailed";
break;
// Note: Not adding a default here so as to let the compiler warn us of
// any new enums that were added in the .h but not listed in this switch.
}
}
bool PayloadState::ShouldBackoffDownload() {
if (response_.disable_payload_backoff) {
LOG(INFO) << "Payload backoff logic is disabled. "
"Can proceed with the download";
return false;
}
if (response_.is_delta_payload) {
// If delta payloads fail, we want to fallback quickly to full payloads as
// they are more likely to succeed. Exponential backoffs would greatly
// slow down the fallback to full payloads. So we don't backoff for delta
// payloads.
LOG(INFO) << "No backoffs for delta payloads. "
<< "Can proceed with the download";
return false;
}
if (!utils::IsOfficialBuild()) {
// Backoffs are needed only for official builds. We do not want any delays
// or update failures due to backoffs during testing or development.
LOG(INFO) << "No backoffs for test/dev images. "
<< "Can proceed with the download";
return false;
}
if (backoff_expiry_time_.is_null()) {
LOG(INFO) << "No backoff expiry time has been set. "
<< "Can proceed with the download";
return false;
}
if (backoff_expiry_time_ < Time::Now()) {
LOG(INFO) << "The backoff expiry time ("
<< utils::ToString(backoff_expiry_time_)
<< ") has elapsed. Can proceed with the download";
return false;
}
LOG(INFO) << "Cannot proceed with downloads as we need to backoff until "
<< utils::ToString(backoff_expiry_time_);
return true;
}
void PayloadState::IncrementPayloadAttemptNumber() {
if (response_.is_delta_payload) {
LOG(INFO) << "Not incrementing payload attempt number for delta payloads";
return;
}
LOG(INFO) << "Incrementing the payload attempt number";
SetPayloadAttemptNumber(GetPayloadAttemptNumber() + 1);
UpdateBackoffExpiryTime();
}
void PayloadState::IncrementUrlIndex() {
uint32_t next_url_index = GetUrlIndex() + 1;
if (next_url_index < GetNumUrls()) {
LOG(INFO) << "Incrementing the URL index for next attempt";
SetUrlIndex(next_url_index);
} else {
LOG(INFO) << "Resetting the current URL index (" << GetUrlIndex() << ") to "
<< "0 as we only have " << GetNumUrls() << " URL(s)";
SetUrlIndex(0);
IncrementPayloadAttemptNumber();
}
// Whenever we update the URL index, we should also clear the URL failure
// count so we can start over fresh for the new URL.
SetUrlFailureCount(0);
}
void PayloadState::IncrementFailureCount() {
uint32_t next_url_failure_count = GetUrlFailureCount() + 1;
if (next_url_failure_count < response_.max_failure_count_per_url) {
LOG(INFO) << "Incrementing the URL failure count";
SetUrlFailureCount(next_url_failure_count);
} else {
LOG(INFO) << "Reached max number of failures for Url" << GetUrlIndex()
<< ". Trying next available URL";
IncrementUrlIndex();
}
}
void PayloadState::UpdateBackoffExpiryTime() {
if (response_.disable_payload_backoff) {
LOG(INFO) << "Resetting backoff expiry time as payload backoff is disabled";
SetBackoffExpiryTime(Time());
return;
}
if (GetPayloadAttemptNumber() == 0) {
SetBackoffExpiryTime(Time());
return;
}
// Since we're doing left-shift below, make sure we don't shift more
// than this. E.g. if uint32_t is 4-bytes, don't left-shift more than 30 bits,
// since we don't expect value of kMaxBackoffDays to be more than 100 anyway.
uint32_t num_days = 1; // the value to be shifted.
const uint32_t kMaxShifts = (sizeof(num_days) * 8) - 2;
// Normal backoff days is 2 raised to (payload_attempt_number - 1).
// E.g. if payload_attempt_number is over 30, limit power to 30.
uint32_t power = min(GetPayloadAttemptNumber() - 1, kMaxShifts);
// The number of days is the minimum of 2 raised to (payload_attempt_number
// - 1) or kMaxBackoffDays.
num_days = min(num_days << power, kMaxBackoffDays);
// We don't want all retries to happen exactly at the same time when
// retrying after backoff. So add some random minutes to fuzz.
int fuzz_minutes = utils::FuzzInt(0, kMaxBackoffFuzzMinutes);
TimeDelta next_backoff_interval = TimeDelta::FromDays(num_days) +
TimeDelta::FromMinutes(fuzz_minutes);
LOG(INFO) << "Incrementing the backoff expiry time by "
<< utils::FormatTimeDelta(next_backoff_interval);
SetBackoffExpiryTime(Time::Now() + next_backoff_interval);
}
void PayloadState::ResetPersistedState() {
SetPayloadAttemptNumber(0);
SetUrlIndex(0);
SetUrlFailureCount(0);
UpdateBackoffExpiryTime(); // This will reset the backoff expiry time.
}
string PayloadState::CalculateResponseSignature() {
string response_sign = StringPrintf("NumURLs = %d\n",
response_.payload_urls.size());
for (size_t i = 0; i < response_.payload_urls.size(); i++)
response_sign += StringPrintf("Url%d = %s\n",
i, response_.payload_urls[i].c_str());
response_sign += StringPrintf("Payload Size = %llu\n"
"Payload Sha256 Hash = %s\n"
"Metadata Size = %llu\n"
"Metadata Signature = %s\n"
"Is Delta Payload = %d\n"
"Max Failure Count Per Url = %d\n"
"Disable Payload Backoff = %d\n",
response_.size,
response_.hash.c_str(),
response_.metadata_size,
response_.metadata_signature.c_str(),
response_.is_delta_payload,
response_.max_failure_count_per_url,
response_.disable_payload_backoff);
return response_sign;
}
void PayloadState::LoadResponseSignature() {
CHECK(prefs_);
string stored_value;
if (prefs_->Exists(kPrefsCurrentResponseSignature) &&
prefs_->GetString(kPrefsCurrentResponseSignature, &stored_value)) {
SetResponseSignature(stored_value);
}
}
void PayloadState::SetResponseSignature(string response_signature) {
CHECK(prefs_);
response_signature_ = response_signature;
LOG(INFO) << "Current Response Signature = \n" << response_signature_;
prefs_->SetString(kPrefsCurrentResponseSignature, response_signature_);
}
void PayloadState::LoadPayloadAttemptNumber() {
CHECK(prefs_);
int64_t stored_value;
if (prefs_->Exists(kPrefsPayloadAttemptNumber) &&
prefs_->GetInt64(kPrefsPayloadAttemptNumber, &stored_value)) {
if (stored_value < 0) {
LOG(ERROR) << "Invalid payload attempt number (" << stored_value
<< ") in persisted state. Defaulting to 0";
stored_value = 0;
}
SetPayloadAttemptNumber(stored_value);
}
}
void PayloadState::SetPayloadAttemptNumber(uint32_t payload_attempt_number) {
CHECK(prefs_);
payload_attempt_number_ = payload_attempt_number;
LOG(INFO) << "Payload Attempt Number = " << payload_attempt_number_;
prefs_->SetInt64(kPrefsPayloadAttemptNumber, payload_attempt_number_);
}
void PayloadState::LoadUrlIndex() {
CHECK(prefs_);
int64_t stored_value;
if (prefs_->Exists(kPrefsCurrentUrlIndex) &&
prefs_->GetInt64(kPrefsCurrentUrlIndex, &stored_value)) {
// We only check for basic sanity value here. Detailed check will be
// done in SetResponse once the first response comes in.
if (stored_value < 0) {
LOG(ERROR) << "Invalid URL Index (" << stored_value
<< ") in persisted state. Defaulting to 0";
stored_value = 0;
}
SetUrlIndex(stored_value);
}
}
void PayloadState::SetUrlIndex(uint32_t url_index) {
CHECK(prefs_);
url_index_ = url_index;
LOG(INFO) << "Current URL Index = " << url_index_;
prefs_->SetInt64(kPrefsCurrentUrlIndex, url_index_);
}
void PayloadState::LoadUrlFailureCount() {
CHECK(prefs_);
int64_t stored_value;
if (prefs_->Exists(kPrefsCurrentUrlFailureCount) &&
prefs_->GetInt64(kPrefsCurrentUrlFailureCount, &stored_value)) {
if (stored_value < 0) {
LOG(ERROR) << "Invalid URL Failure count (" << stored_value
<< ") in persisted state. Defaulting to 0";
stored_value = 0;
}
SetUrlFailureCount(stored_value);
}
}
void PayloadState::SetUrlFailureCount(uint32_t url_failure_count) {
CHECK(prefs_);
url_failure_count_ = url_failure_count;
LOG(INFO) << "Current URL (Url" << GetUrlIndex()
<< ")'s Failure Count = " << url_failure_count_;
prefs_->SetInt64(kPrefsCurrentUrlFailureCount, url_failure_count_);
}
void PayloadState::LoadBackoffExpiryTime() {
CHECK(prefs_);
int64_t stored_value;
if (!prefs_->Exists(kPrefsBackoffExpiryTime))
return;
if (!prefs_->GetInt64(kPrefsBackoffExpiryTime, &stored_value))
return;
Time stored_time = Time::FromInternalValue(stored_value);
if (stored_time > Time::Now() + TimeDelta::FromDays(kMaxBackoffDays)) {
LOG(ERROR) << "Invalid backoff expiry time ("
<< utils::ToString(stored_time)
<< ") in persisted state. Resetting.";
stored_time = Time();
}
SetBackoffExpiryTime(stored_time);
}
void PayloadState::SetBackoffExpiryTime(const Time& new_time) {
CHECK(prefs_);
backoff_expiry_time_ = new_time;
LOG(INFO) << "Backoff Expiry Time = "
<< utils::ToString(backoff_expiry_time_);
prefs_->SetInt64(kPrefsBackoffExpiryTime,
backoff_expiry_time_.ToInternalValue());
}
} // namespace chromeos_update_engine