blob: fc5b7eae57e3cbd8edf6217f86bbc448906e66cd [file] [log] [blame]
/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ART_SRC_VERIFIER_METHOD_VERIFIER_H_
#define ART_SRC_VERIFIER_METHOD_VERIFIER_H_
#include <deque>
#include <limits>
#include <set>
#include <vector>
#include "casts.h"
#include "compiler.h"
#include "dex_file.h"
#include "dex_instruction.h"
#include "macros.h"
#include "object.h"
#include "reg_type.h"
#include "reg_type_cache.h"
#include "register_line.h"
#include "safe_map.h"
#include "stl_util.h"
#include "UniquePtr.h"
namespace art {
struct ReferenceMap2Visitor;
#if defined(ART_USE_LLVM_COMPILER)
namespace compiler_llvm {
class InferredRegCategoryMap;
} // namespace compiler_llvm
#endif
namespace verifier {
class MethodVerifier;
class InsnFlags;
class PcToReferenceMap;
/*
* Set this to enable dead code scanning. This is not required, but it's very useful when testing
* changes to the verifier (to make sure we're not skipping over stuff). The only reason not to do
* it is that it slightly increases the time required to perform verification.
*/
#define DEAD_CODE_SCAN kIsDebugBuild
/*
* "Direct" and "virtual" methods are stored independently. The type of call used to invoke the
* method determines which list we search, and whether we travel up into superclasses.
*
* (<clinit>, <init>, and methods declared "private" or "static" are stored in the "direct" list.
* All others are stored in the "virtual" list.)
*/
enum MethodType {
METHOD_UNKNOWN = 0,
METHOD_DIRECT, // <init>, private
METHOD_STATIC, // static
METHOD_VIRTUAL, // virtual, super
METHOD_INTERFACE // interface
};
/*
* An enumeration of problems that can turn up during verification.
* Both VERIFY_ERROR_BAD_CLASS_SOFT and VERIFY_ERROR_BAD_CLASS_HARD denote failures that cause
* the entire class to be rejected. However, VERIFY_ERROR_BAD_CLASS_SOFT denotes a soft failure
* that can potentially be corrected, and the verifier will try again at runtime.
* VERIFY_ERROR_BAD_CLASS_HARD denotes a hard failure that can't be corrected, and will cause
* the class to remain uncompiled. Other errors denote verification errors that cause bytecode
* to be rewritten to fail at runtime.
*/
enum VerifyError {
VERIFY_ERROR_NONE = 0, // No error; must be zero.
VERIFY_ERROR_BAD_CLASS_HARD, // VerifyError; hard error that skips compilation.
VERIFY_ERROR_BAD_CLASS_SOFT, // VerifyError; soft error that verifies again at runtime.
VERIFY_ERROR_NO_CLASS, // NoClassDefFoundError.
VERIFY_ERROR_NO_FIELD, // NoSuchFieldError.
VERIFY_ERROR_NO_METHOD, // NoSuchMethodError.
VERIFY_ERROR_ACCESS_CLASS, // IllegalAccessError.
VERIFY_ERROR_ACCESS_FIELD, // IllegalAccessError.
VERIFY_ERROR_ACCESS_METHOD, // IllegalAccessError.
VERIFY_ERROR_CLASS_CHANGE, // IncompatibleClassChangeError.
VERIFY_ERROR_INSTANTIATION, // InstantiationError.
};
std::ostream& operator<<(std::ostream& os, const VerifyError& rhs);
/*
* Identifies the type of reference in the instruction that generated the verify error
* (e.g. VERIFY_ERROR_ACCESS_CLASS could come from a method, field, or class reference).
*
* This must fit in two bits.
*/
enum VerifyErrorRefType {
VERIFY_ERROR_REF_CLASS = 0,
VERIFY_ERROR_REF_FIELD = 1,
VERIFY_ERROR_REF_METHOD = 2,
};
const int kVerifyErrorRefTypeShift = 6;
// We don't need to store the register data for many instructions, because we either only need
// it at branch points (for verification) or GC points and branches (for verification +
// type-precise register analysis).
enum RegisterTrackingMode {
kTrackRegsBranches,
kTrackRegsGcPoints,
kTrackRegsAll,
};
class PcToRegisterLineTable {
public:
PcToRegisterLineTable() {}
~PcToRegisterLineTable() {
STLDeleteValues(&pc_to_register_line_);
}
// Initialize the RegisterTable. Every instruction address can have a different set of information
// about what's in which register, but for verification purposes we only need to store it at
// branch target addresses (because we merge into that).
void Init(RegisterTrackingMode mode, InsnFlags* flags, uint32_t insns_size,
uint16_t registers_size, MethodVerifier* verifier);
RegisterLine* GetLine(size_t idx) {
Table::iterator result = pc_to_register_line_.find(idx); // TODO: C++0x auto
if (result == pc_to_register_line_.end()) {
return NULL;
} else {
return result->second;
}
}
private:
typedef SafeMap<int32_t, RegisterLine*> Table;
// Map from a dex pc to the register status associated with it
Table pc_to_register_line_;
};
// The verifier
class MethodVerifier {
public:
/* Verify a class. Returns "true" on success. */
static bool VerifyClass(const Class* klass, std::string& error);
/*
* Structurally verify a class. Returns "true" on success. Used at compile time
* when the pointer for the method or declaring class can't be resolved.
*/
static bool VerifyClass(const DexFile* dex_file, DexCache* dex_cache,
const ClassLoader* class_loader, uint32_t class_def_idx, std::string& error);
uint8_t EncodePcToReferenceMapData() const;
uint32_t DexFileVersion() const {
return dex_file_->GetVersion();
}
RegTypeCache* GetRegTypeCache() {
return &reg_types_;
}
// Verification failed
std::ostream& Fail(VerifyError error);
// Log for verification information
std::ostream& LogVerifyInfo() {
return info_messages_ << "VFY: " << PrettyMethod(method_)
<< '[' << reinterpret_cast<void*>(work_insn_idx_) << "] : ";
}
// Dump the state of the verifier, namely each instruction, what flags are set on it, register
// information
void Dump(std::ostream& os);
static const std::vector<uint8_t>* GetGcMap(Compiler::MethodReference ref);
static void InitGcMaps();
static void DeleteGcMaps();
#if defined(ART_USE_LLVM_COMPILER)
static const compiler_llvm::InferredRegCategoryMap* GetInferredRegCategoryMap(Compiler::MethodReference ref);
static void InitInferredRegCategoryMaps();
static void DeleteInferredRegCategoryMaps();
#endif
static bool IsClassRejected(Compiler::ClassReference ref);
private:
explicit MethodVerifier(Method* method);
explicit MethodVerifier(const DexFile* dex_file, DexCache* dex_cache,
const ClassLoader* class_loader, uint32_t class_def_idx, const DexFile::CodeItem* code_item);
/*
* Perform verification on a single method.
*
* We do this in three passes:
* (1) Walk through all code units, determining instruction locations,
* widths, and other characteristics.
* (2) Walk through all code units, performing static checks on
* operands.
* (3) Iterate through the method, checking type safety and looking
* for code flow problems.
*
* Some checks may be bypassed depending on the verification mode. We can't
* turn this stuff off completely if we want to do "exact" GC.
*
* Confirmed here:
* - code array must not be empty
* Confirmed by ComputeWidthsAndCountOps():
* - opcode of first instruction begins at index 0
* - only documented instructions may appear
* - each instruction follows the last
* - last byte of last instruction is at (code_length-1)
*/
static bool VerifyMethod(Method* method);
static void VerifyMethodAndDump(Method* method);
/*
* Perform structural verification on a single method. Used at compile time
* when the pointer for the method or declaring class can't be resolved.
*
* We do this in two passes:
* (1) Walk through all code units, determining instruction locations,
* widths, and other characteristics.
* (2) Walk through all code units, performing static checks on
* operands.
*
* Code flow verification is skipped since a resolved method and class are
* necessary to perform all the checks.
*/
static bool VerifyMethod(uint32_t method_idx, const DexFile* dex_file, DexCache* dex_cache,
const ClassLoader* class_loader, uint32_t class_def_idx, const DexFile::CodeItem* code_item);
/* Run both structural and code flow verification on the method. */
bool VerifyAll();
/* Perform structural verification on a single method. */
bool VerifyStructure();
/*
* Compute the width of the instruction at each address in the instruction stream, and store it in
* insn_flags_. Addresses that are in the middle of an instruction, or that are part of switch
* table data, are not touched (so the caller should probably initialize "insn_flags" to zero).
*
* The "new_instance_count_" and "monitor_enter_count_" fields in vdata are also set.
*
* Performs some static checks, notably:
* - opcode of first instruction begins at index 0
* - only documented instructions may appear
* - each instruction follows the last
* - last byte of last instruction is at (code_length-1)
*
* Logs an error and returns "false" on failure.
*/
bool ComputeWidthsAndCountOps();
/*
* Set the "in try" flags for all instructions protected by "try" statements. Also sets the
* "branch target" flags for exception handlers.
*
* Call this after widths have been set in "insn_flags".
*
* Returns "false" if something in the exception table looks fishy, but we're expecting the
* exception table to be somewhat sane.
*/
bool ScanTryCatchBlocks();
/*
* Perform static verification on all instructions in a method.
*
* Walks through instructions in a method calling VerifyInstruction on each.
*/
bool VerifyInstructions();
/*
* Perform static verification on an instruction.
*
* As a side effect, this sets the "branch target" flags in InsnFlags.
*
* "(CF)" items are handled during code-flow analysis.
*
* v3 4.10.1
* - target of each jump and branch instruction must be valid
* - targets of switch statements must be valid
* - operands referencing constant pool entries must be valid
* - (CF) operands of getfield, putfield, getstatic, putstatic must be valid
* - (CF) operands of method invocation instructions must be valid
* - (CF) only invoke-direct can call a method starting with '<'
* - (CF) <clinit> must never be called explicitly
* - operands of instanceof, checkcast, new (and variants) must be valid
* - new-array[-type] limited to 255 dimensions
* - can't use "new" on an array class
* - (?) limit dimensions in multi-array creation
* - local variable load/store register values must be in valid range
*
* v3 4.11.1.2
* - branches must be within the bounds of the code array
* - targets of all control-flow instructions are the start of an instruction
* - register accesses fall within range of allocated registers
* - (N/A) access to constant pool must be of appropriate type
* - code does not end in the middle of an instruction
* - execution cannot fall off the end of the code
* - (earlier) for each exception handler, the "try" area must begin and
* end at the start of an instruction (end can be at the end of the code)
* - (earlier) for each exception handler, the handler must start at a valid
* instruction
*/
bool VerifyInstruction(const Instruction* inst, uint32_t code_offset);
/* Ensure that the register index is valid for this code item. */
bool CheckRegisterIndex(uint32_t idx);
/* Ensure that the wide register index is valid for this code item. */
bool CheckWideRegisterIndex(uint32_t idx);
// Perform static checks on a field get or set instruction. All we do here is ensure that the
// field index is in the valid range.
bool CheckFieldIndex(uint32_t idx);
// Perform static checks on a method invocation instruction. All we do here is ensure that the
// method index is in the valid range.
bool CheckMethodIndex(uint32_t idx);
// Perform static checks on a "new-instance" instruction. Specifically, make sure the class
// reference isn't for an array class.
bool CheckNewInstance(uint32_t idx);
/* Ensure that the string index is in the valid range. */
bool CheckStringIndex(uint32_t idx);
// Perform static checks on an instruction that takes a class constant. Ensure that the class
// index is in the valid range.
bool CheckTypeIndex(uint32_t idx);
// Perform static checks on a "new-array" instruction. Specifically, make sure they aren't
// creating an array of arrays that causes the number of dimensions to exceed 255.
bool CheckNewArray(uint32_t idx);
// Verify an array data table. "cur_offset" is the offset of the fill-array-data instruction.
bool CheckArrayData(uint32_t cur_offset);
// Verify that the target of a branch instruction is valid. We don't expect code to jump directly
// into an exception handler, but it's valid to do so as long as the target isn't a
// "move-exception" instruction. We verify that in a later stage.
// The dex format forbids certain instructions from branching to themselves.
// Updates "insnFlags", setting the "branch target" flag.
bool CheckBranchTarget(uint32_t cur_offset);
// Verify a switch table. "cur_offset" is the offset of the switch instruction.
// Updates "insnFlags", setting the "branch target" flag.
bool CheckSwitchTargets(uint32_t cur_offset);
// Check the register indices used in a "vararg" instruction, such as invoke-virtual or
// filled-new-array.
// - vA holds word count (0-5), args[] have values.
// There are some tests we don't do here, e.g. we don't try to verify that invoking a method that
// takes a double is done with consecutive registers. This requires parsing the target method
// signature, which we will be doing later on during the code flow analysis.
bool CheckVarArgRegs(uint32_t vA, uint32_t arg[]);
// Check the register indices used in a "vararg/range" instruction, such as invoke-virtual/range
// or filled-new-array/range.
// - vA holds word count, vC holds index of first reg.
bool CheckVarArgRangeRegs(uint32_t vA, uint32_t vC);
// Extract the relative offset from a branch instruction.
// Returns "false" on failure (e.g. this isn't a branch instruction).
bool GetBranchOffset(uint32_t cur_offset, int32_t* pOffset, bool* pConditional,
bool* selfOkay);
/* Perform detailed code-flow analysis on a single method. */
bool VerifyCodeFlow();
// Set the register types for the first instruction in the method based on the method signature.
// This has the side-effect of validating the signature.
bool SetTypesFromSignature();
/*
* Perform code flow on a method.
*
* The basic strategy is as outlined in v3 4.11.1.2: set the "changed" bit on the first
* instruction, process it (setting additional "changed" bits), and repeat until there are no
* more.
*
* v3 4.11.1.1
* - (N/A) operand stack is always the same size
* - operand stack [registers] contain the correct types of values
* - local variables [registers] contain the correct types of values
* - methods are invoked with the appropriate arguments
* - fields are assigned using values of appropriate types
* - opcodes have the correct type values in operand registers
* - there is never an uninitialized class instance in a local variable in code protected by an
* exception handler (operand stack is okay, because the operand stack is discarded when an
* exception is thrown) [can't know what's a local var w/o the debug info -- should fall out of
* register typing]
*
* v3 4.11.1.2
* - execution cannot fall off the end of the code
*
* (We also do many of the items described in the "static checks" sections, because it's easier to
* do them here.)
*
* We need an array of RegType values, one per register, for every instruction. If the method uses
* monitor-enter, we need extra data for every register, and a stack for every "interesting"
* instruction. In theory this could become quite large -- up to several megabytes for a monster
* function.
*
* NOTE:
* The spec forbids backward branches when there's an uninitialized reference in a register. The
* idea is to prevent something like this:
* loop:
* move r1, r0
* new-instance r0, MyClass
* ...
* if-eq rN, loop // once
* initialize r0
*
* This leaves us with two different instances, both allocated by the same instruction, but only
* one is initialized. The scheme outlined in v3 4.11.1.4 wouldn't catch this, so they work around
* it by preventing backward branches. We achieve identical results without restricting code
* reordering by specifying that you can't execute the new-instance instruction if a register
* contains an uninitialized instance created by that same instruction.
*/
bool CodeFlowVerifyMethod();
/*
* Perform verification for a single instruction.
*
* This requires fully decoding the instruction to determine the effect it has on registers.
*
* Finds zero or more following instructions and sets the "changed" flag if execution at that
* point needs to be (re-)evaluated. Register changes are merged into "reg_types_" at the target
* addresses. Does not set or clear any other flags in "insn_flags_".
*/
bool CodeFlowVerifyInstruction(uint32_t* start_guess);
// Perform verification of a new array instruction
void VerifyNewArray(const DecodedInstruction& dec_insn, bool is_filled,
bool is_range);
// Perform verification of an aget instruction. The destination register's type will be set to
// be that of component type of the array unless the array type is unknown, in which case a
// bottom type inferred from the type of instruction is used. is_primitive is false for an
// aget-object.
void VerifyAGet(const DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive);
// Perform verification of an aput instruction.
void VerifyAPut(const DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive);
// Lookup instance field and fail for resolution violations
Field* GetInstanceField(const RegType& obj_type, int field_idx);
// Lookup static field and fail for resolution violations
Field* GetStaticField(int field_idx);
// Perform verification of an iget or sget instruction.
void VerifyISGet(const DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive, bool is_static);
// Perform verification of an iput or sput instruction.
void VerifyISPut(const DecodedInstruction& insn, const RegType& insn_type,
bool is_primitive, bool is_static);
// Resolves a class based on an index and performs access checks to ensure the referrer can
// access the resolved class.
const RegType& ResolveClassAndCheckAccess(uint32_t class_idx);
/*
* For the "move-exception" instruction at "work_insn_idx_", which must be at an exception handler
* address, determine the Join of all exceptions that can land here. Fails if no matching
* exception handler can be found or if the Join of exception types fails.
*/
const RegType& GetCaughtExceptionType();
/*
* Resolves a method based on an index and performs access checks to ensure
* the referrer can access the resolved method.
* Does not throw exceptions.
*/
Method* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type);
/*
* Verify the arguments to a method. We're executing in "method", making
* a call to the method reference in vB.
*
* If this is a "direct" invoke, we allow calls to <init>. For calls to
* <init>, the first argument may be an uninitialized reference. Otherwise,
* calls to anything starting with '<' will be rejected, as will any
* uninitialized reference arguments.
*
* For non-static method calls, this will verify that the method call is
* appropriate for the "this" argument.
*
* The method reference is in vBBBB. The "is_range" parameter determines
* whether we use 0-4 "args" values or a range of registers defined by
* vAA and vCCCC.
*
* Widening conversions on integers and references are allowed, but
* narrowing conversions are not.
*
* Returns the resolved method on success, NULL on failure (with *failure
* set appropriately).
*/
Method* VerifyInvocationArgs(const DecodedInstruction& dec_insn,
MethodType method_type, bool is_range, bool is_super);
/*
* Return the register type for the method. We can't just use the already-computed
* DalvikJniReturnType, because if it's a reference type we need to do the class lookup.
* Returned references are assumed to be initialized. Returns kRegTypeUnknown for "void".
*/
const RegType& GetMethodReturnType();
/*
* Verify that the target instruction is not "move-exception". It's important that the only way
* to execute a move-exception is as the first instruction of an exception handler.
* Returns "true" if all is well, "false" if the target instruction is move-exception.
*/
bool CheckNotMoveException(const uint16_t* insns, int insn_idx);
/*
* Replace an instruction with "throw-verification-error". This allows us to
* defer error reporting until the code path is first used.
*/
void ReplaceFailingInstruction();
/*
* Control can transfer to "next_insn". Merge the registers from merge_line into the table at
* next_insn, and set the changed flag on the target address if any of the registers were changed.
* Returns "false" if an error is encountered.
*/
bool UpdateRegisters(uint32_t next_insn, const RegisterLine* merge_line);
#if defined(ART_USE_LLVM_COMPILER)
/*
* Generate the inferred register category for LLVM-based code generator.
* Returns a pointer to a two-dimension Class array, or NULL on failure.
*/
const compiler_llvm::InferredRegCategoryMap* GenerateInferredRegCategoryMap();
#endif
/*
* Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
* verification). For type-precise determination we have all the data we need, so we just need to
* encode it in some clever fashion.
* Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
*/
const std::vector<uint8_t>* GenerateGcMap();
// Verify that the GC map associated with method_ is well formed
void VerifyGcMap(const std::vector<uint8_t>& data);
// Compute sizes for GC map data
void ComputeGcMapSizes(size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
InsnFlags* CurrentInsnFlags();
// All the GC maps that the verifier has created
typedef SafeMap<const Compiler::MethodReference, const std::vector<uint8_t>*> GcMapTable;
static Mutex* gc_maps_lock_;
static GcMapTable* gc_maps_;
static void SetGcMap(Compiler::MethodReference ref, const std::vector<uint8_t>& gc_map);
#if defined(ART_USE_LLVM_COMPILER)
// All the inferred register category maps that the verifier has created
typedef SafeMap<const Compiler::MethodReference,
const compiler_llvm::InferredRegCategoryMap*> InferredRegCategoryMapTable;
static Mutex* inferred_reg_category_maps_lock_;
static InferredRegCategoryMapTable* inferred_reg_category_maps_;
static void SetInferredRegCategoryMap(Compiler::MethodReference ref,
const compiler_llvm::InferredRegCategoryMap& m);
#endif
static void AddRejectedClass(Compiler::ClassReference ref);
RegTypeCache reg_types_;
PcToRegisterLineTable reg_table_;
// Storage for the register status we're currently working on.
UniquePtr<RegisterLine> work_line_;
// The address of the instruction we're currently working on, note that this is in 2 byte
// quantities
uint32_t work_insn_idx_;
// Storage for the register status we're saving for later.
UniquePtr<RegisterLine> saved_line_;
Method* method_; // The method we're working on.
const DexFile* dex_file_; // The dex file containing the method.
DexCache* dex_cache_; // The dex_cache for the declaring class of the method.
const ClassLoader* class_loader_; // The class loader for the declaring class of the method.
uint32_t class_def_idx_; // The class def index of the declaring class of the method.
const DexFile::CodeItem* code_item_; // The code item containing the code for the method.
UniquePtr<InsnFlags[]> insn_flags_; // Instruction widths and flags, one entry per code unit.
// The type of any error that occurs
VerifyError failure_;
// Failure message log
std::ostringstream fail_messages_;
// Info message log
std::ostringstream info_messages_;
// The number of occurrences of specific opcodes.
size_t new_instance_count_;
size_t monitor_enter_count_;
friend struct art::ReferenceMap2Visitor; // for VerifyMethodAndDump
};
} // namespace verifier
} // namespace art
#endif // ART_SRC_VERIFIER_METHOD_VERIFIER_H_