Write CowReplace blocks in batch

snapuserd is optimizing merge performance, sending blocks in batch will
help.

Test: th
Change-Id: Ie358137768b1f0d7e03530e5f1ddb57e40e183ed
diff --git a/common/cow_operation_convert.cc b/common/cow_operation_convert.cc
index 2564abf..a8f7541 100644
--- a/common/cow_operation_convert.cc
+++ b/common/cow_operation_convert.cc
@@ -24,6 +24,23 @@
 
 namespace chromeos_update_engine {
 
+namespace {
+
+bool IsConsecutive(const CowOperation& op1, const CowOperation& op2) {
+  return op1.op == op2.op && op1.dst_block + op1.block_count == op2.dst_block &&
+         op1.src_block + op1.block_count == op2.src_block;
+}
+
+void push_back(std::vector<CowOperation>* converted, const CowOperation& op) {
+  if (!converted->empty() && IsConsecutive(converted->back(), op)) {
+    converted->back().block_count++;
+  } else {
+    converted->push_back(op);
+  }
+}
+
+}  // namespace
+
 std::vector<CowOperation> ConvertToCowOperations(
     const ::google::protobuf::RepeatedPtrField<
         ::chromeos_update_engine::InstallOperation>& operations,
@@ -31,7 +48,6 @@
         merge_operations) {
   ExtentRanges merge_extents;
   std::vector<CowOperation> converted;
-  ExtentRanges modified_extents;
 
   // We want all CowCopy ops to be done first, before any COW_REPLACE happen.
   // Therefore we add these ops in 2 separate loops. This is because during
@@ -53,8 +69,7 @@
     for (uint64_t i = src_extent.num_blocks(); i > 0; i--) {
       auto src_block = src_extent.start_block() + i - 1;
       auto dst_block = dst_extent.start_block() + i - 1;
-      converted.push_back({CowOperation::CowCopy, src_block, dst_block});
-      modified_extents.AddBlock(dst_block);
+      converted.push_back({CowOperation::CowCopy, src_block, dst_block, 1});
     }
   }
   // COW_REPLACE are added after COW_COPY, because replace might modify blocks
@@ -68,10 +83,11 @@
     BlockIterator it1{src_extents};
     BlockIterator it2{dst_extents};
     while (!it1.is_end() && !it2.is_end()) {
-      auto src_block = *it1;
-      auto dst_block = *it2;
+      const auto src_block = *it1;
+      const auto dst_block = *it2;
       if (!merge_extents.ContainsBlock(dst_block)) {
-        converted.push_back({CowOperation::CowReplace, src_block, dst_block});
+        push_back(&converted,
+                  {CowOperation::CowReplace, src_block, dst_block, 1});
       }
       ++it1;
       ++it2;
diff --git a/common/cow_operation_convert.h b/common/cow_operation_convert.h
index c0543f7..60c820f 100644
--- a/common/cow_operation_convert.h
+++ b/common/cow_operation_convert.h
@@ -30,8 +30,9 @@
     CowReplace = android::snapshot::kCowReplaceOp,
   };
   Type op;
-  uint64_t src_block;
-  uint64_t dst_block;
+  uint64_t src_block{};
+  uint64_t dst_block{};
+  uint64_t block_count{1};
 };
 
 // Convert SOURCE_COPY operations in `operations` list to a list of
diff --git a/common/cow_operation_convert_unittest.cc b/common/cow_operation_convert_unittest.cc
index 93173fe..d5b522a 100644
--- a/common/cow_operation_convert_unittest.cc
+++ b/common/cow_operation_convert_unittest.cc
@@ -46,7 +46,8 @@
 }
 
 std::ostream& operator<<(std::ostream& out, const CowOperation& c) {
-  out << "{" << c.op << ", " << c.src_block << ", " << c.dst_block << "}";
+  out << "{" << c.op << ", " << c.src_block << ", " << c.dst_block << ", "
+      << c.block_count << "}";
   return out;
 }
 
@@ -63,18 +64,24 @@
     ExtentRanges modified_extents;
     for (auto&& cow_op : cow_ops) {
       if (cow_op.op == CowOperation::CowCopy) {
-        EXPECT_TRUE(src_extent_set.ContainsBlock(cow_op.src_block));
-        // converted operations should be conflict free.
-        EXPECT_FALSE(modified_extents.ContainsBlock(cow_op.src_block))
-            << "SOURCE_COPY operation " << cow_op
-            << " read from a modified block";
+        for (size_t i = 0; i < cow_op.block_count; i++) {
+          ASSERT_TRUE(src_extent_set.ContainsBlock(cow_op.src_block + i));
+          // converted operations should be conflict free.
+          ASSERT_FALSE(modified_extents.ContainsBlock(cow_op.src_block + i))
+              << "SOURCE_COPY operation " << cow_op
+              << " read from a modified block";
+        }
       }
-      EXPECT_TRUE(dst_extent_set.ContainsBlock(cow_op.dst_block));
-      dst_extent_set.SubtractExtent(ExtentForRange(cow_op.dst_block, 1));
-      modified_extents.AddBlock(cow_op.dst_block);
+      for (size_t i = 0; i < cow_op.block_count; i++) {
+        ASSERT_TRUE(dst_extent_set.ContainsBlock(cow_op.dst_block + i));
+      }
+      dst_extent_set.SubtractExtent(
+          ExtentForRange(cow_op.dst_block, cow_op.block_count));
+      modified_extents.AddExtent(
+          ExtentForRange(cow_op.dst_block, cow_op.block_count));
     }
     // The generated CowOps should cover all extents in InstallOps.
-    EXPECT_EQ(dst_extent_set.blocks(), 0UL);
+    ASSERT_EQ(dst_extent_set.blocks(), 0UL);
     // It's possible that src_extent_set is non-empty, because some operations
     // will be converted to CowReplace, and we don't count the source extent for
     // those.
@@ -233,4 +240,61 @@
   VerifyCowMergeOp(cow_ops);
 }
 
+TEST_F(CowOperationConvertTest, CowReplaceCoalesce) {
+  AddOperation(
+      &operations_, InstallOperation::SOURCE_COPY, {{30, 10}}, {{0, 10}});
+
+  AddMergeOperation(
+      &merge_operations_, CowMergeOperation::COW_COPY, {30, 1}, {0, 1});
+  AddMergeOperation(
+      &merge_operations_, CowMergeOperation::COW_COPY, {31, 1}, {1, 1});
+  AddMergeOperation(
+      &merge_operations_, CowMergeOperation::COW_COPY, {32, 1}, {2, 1});
+
+  auto cow_ops = ConvertToCowOperations(operations_, merge_operations_);
+  for (const auto& op : cow_ops) {
+    LOG(INFO) << op;
+  }
+  ASSERT_EQ(cow_ops.size(), 4UL);
+  // Expect 3 COW_COPY and 1 COW_REPLACE
+  ASSERT_EQ(std::count_if(cow_ops.begin(),
+                          cow_ops.end(),
+                          [](auto&& cow_op) {
+                            return cow_op.op == CowOperation::CowCopy;
+                          }),
+            3);
+  ASSERT_EQ(std::count_if(cow_ops.begin(),
+                          cow_ops.end(),
+                          [](auto&& cow_op) {
+                            return cow_op.op == CowOperation::CowReplace;
+                          }),
+            1);
+  VerifyCowMergeOp(cow_ops);
+}
+
+TEST_F(CowOperationConvertTest, CowReplaceDifferenceSrc) {
+  AddOperation(
+      &operations_, InstallOperation::SOURCE_COPY, {{30, 10}}, {{0, 10}});
+  AddOperation(
+      &operations_, InstallOperation::SOURCE_COPY, {{100, 10}}, {{10, 10}});
+
+  auto cow_ops = ConvertToCowOperations(operations_, merge_operations_);
+  for (const auto& op : cow_ops) {
+    LOG(INFO) << op;
+  }
+  ASSERT_EQ(cow_ops.size(), 2UL);
+  // |src_block| matters. Even for ReplaceOperation.
+  // As update_engine still need to know where the data come from.
+  ASSERT_EQ(cow_ops[0].op, CowOperation::CowReplace);
+  ASSERT_EQ(cow_ops[0].src_block, 30UL);
+  ASSERT_EQ(cow_ops[0].dst_block, 0UL);
+  ASSERT_EQ(cow_ops[0].block_count, 10UL);
+
+  ASSERT_EQ(cow_ops[1].op, CowOperation::CowReplace);
+  ASSERT_EQ(cow_ops[1].src_block, 100UL);
+  ASSERT_EQ(cow_ops[1].dst_block, 10UL);
+  ASSERT_EQ(cow_ops[1].block_count, 10UL);
+  VerifyCowMergeOp(cow_ops);
+}
+
 }  // namespace chromeos_update_engine
diff --git a/payload_consumer/vabc_partition_writer.cc b/payload_consumer/vabc_partition_writer.cc
index d827fad..ba0f484 100644
--- a/payload_consumer/vabc_partition_writer.cc
+++ b/payload_consumer/vabc_partition_writer.cc
@@ -193,30 +193,36 @@
     const std::vector<CowOperation>& converted,
     ICowWriter* cow_writer,
     FileDescriptorPtr source_fd) {
-  std::vector<uint8_t> buffer(block_size);
-
   for (const auto& cow_op : converted) {
+    std::vector<uint8_t> buffer;
     switch (cow_op.op) {
       case CowOperation::CowCopy:
         if (cow_op.src_block == cow_op.dst_block) {
           continue;
         }
-        TEST_AND_RETURN_FALSE(
-            cow_writer->AddCopy(cow_op.dst_block, cow_op.src_block));
+        // Add blocks in reverse order, because snapused specifically prefers
+        // this ordering. Since we already eliminated all self-overlapping
+        // SOURCE_COPY during delta generation, this should be safe to do.
+        for (size_t i = cow_op.block_count; i > 0; i--) {
+          TEST_AND_RETURN_FALSE(cow_writer->AddCopy(cow_op.dst_block + i - 1,
+                                                    cow_op.src_block + i - 1));
+        }
         break;
       case CowOperation::CowReplace:
+        buffer.resize(block_size * cow_op.block_count);
         ssize_t bytes_read = 0;
         TEST_AND_RETURN_FALSE(utils::ReadAll(source_fd,
                                              buffer.data(),
-                                             block_size,
+                                             block_size * cow_op.block_count,
                                              cow_op.src_block * block_size,
                                              &bytes_read));
-        if (bytes_read <= 0 || static_cast<size_t>(bytes_read) != block_size) {
+        if (bytes_read <= 0 ||
+            static_cast<size_t>(bytes_read) != buffer.size()) {
           LOG(ERROR) << "source_fd->Read failed: " << bytes_read;
           return false;
         }
         TEST_AND_RETURN_FALSE(cow_writer->AddRawBlocks(
-            cow_op.dst_block, buffer.data(), block_size));
+            cow_op.dst_block, buffer.data(), buffer.size()));
         break;
     }
   }