YARA:第十六章-libyara之C API手册(威胁检测)

发布于:2024-07-27 ⋅ 阅读:(71) ⋅ 点赞:(0)

        YARA是一个流行的开源项目,用于恶意软件检测和分析。它允许用户定义规则,这些规则可以识别和分类恶意软件样本。YARA 的强大之处在于其灵活性和易用性,尤其是在 C/C++ 项目中。本文将详细介绍如何通过 YARA 的 C API 集成和使用 YARA,从而将 YARA 无缝集成到您的安全解决方案中。

        本章主要介绍libyara的C API接口用于开发时参考,其它关于libyara的详细介绍可以浏览以下章节:


1 数据结构











2. 函数接口

2.1 yr_initialize

2.2 yr_finalize

2.3 yr_compiler_create

2.4 yr_compiler_destroy

2.5 yr_compiler_set_callback

2.6 yr_compiler_set_include_callback

2.7 yr_compiler_add_file

2.8 yr_compiler_add_fd

2.9 yr_compiler_add_string

2.10 yr_compiler_get_rules

2.11 yr_compiler_define_integer_variable

2.12 yr_compiler_define_float_variable

2.13 yr_compiler_define_boolean_variable

2.14 yr_compiler_define_string_variable

2.15 yr_rules_define_integer_variable

2.16 yr_rules_define_boolean_variable

2.17 yr_rules_define_float_variable

2.18 yr_rules_define_string_variable

2.19 yr_rules_destroy

2.20 yr_rules_save

2.21 yr_rules_save_stream

2.22 yr_rules_load

2.23 yr_rules_load_stream

2.24 yr_rules_scan_mem

2.25 yr_rules_scan_file

2.26 yr_rules_scan_fd

2.27 yr_rule_disable

2.28 yr_rule_enable

2.29 yr_scanner_create

2.30 yr_scanner_destroy

2.31 yr_scanner_set_callback

2.32 yr_scanner_set_timeout

2.33 yr_scanner_set_flags

2.34 yr_scanner_define_integer_variable

2.35 yr_scanner_define_boolean_variable

2.36 yr_scanner_define_float_variable

2.37 yr_scanner_define_string_variable

2.38 yr_scanner_scan_mem_blocks

2.39 yr_scanner_scan_mem

2.40  yr_scanner_scan_file

2.41 yr_scanner_scan_fd

2.42 yr_scanner_last_error_rule

2.43 yr_scanner_last_error_string

3. 错误码













1 数据结构



typedef struct _YR_COMPILER
  // Arena that contains the data generated by the compiled. The arena has
  // the following buffers:
  //      A YR_SUMMARY struct.
  //      An array of YR_RULE structures, one per each rule.
  //      An array of YR_STRING structures, one per each string.
  //      An array of YR_META structures, one per each meta definition.
  //      An array of YR_NAMESPACE structures, one per each namespace.
  //      An array of YR_EXTERNAL_VARIABLE structures, one per each external
  //      variable defined.
  //   YR_SZ_POOL:
  //      A collection of null-terminated strings. This buffer contains
  //      identifiers, literal strings, and in general any null-terminated
  //      string referenced by other data structures.
  //      The code for the condition section of all the rules. This is the
  //      code executed by yr_execute_code.
  //      Similar to YR_CODE_SECTION, but it contains the code for regular
  //      expressions. This is the code executed by yr_re_exec and
  //      yr_re_fast_exec.
  //      An array of uint32_t containing the Aho-Corasick transition table.
  //      See comment in _yr_ac_build_transition_table for details.
  //      An array of uint32_t with the same number of items than the transition
  //      table. If entry N in the transition table corresponds to some
  //      Aho-Corasick state, the N-th item in this array has the index within
  //      the matches pool where the list of matches for that state begins.
  //      An array of YR_AC_MATCH structures.
  YR_ARENA* arena;

  // Index of the rule being compiled in the array of YR_RULE structures
  // stored in YR_RULES_TABLE. If this is MAX_UINT32 the compiler is not
  // parsing a rule.
  uint32_t current_rule_idx;

  // Index of the rule that comes next during parsing.
  uint32_t next_rule_idx;

  // Index of the string being compiled in the array of YR_STRING structures
  // stored in YR_STRINGS_TABLE.
  uint32_t current_string_idx;

  // Index of the current namespace in the array of YR_NAMESPACE structures
  // stored in YR_NAMESPACES_TABLE.
  uint32_t current_namespace_idx;

  // Index of the current meta in the array of YR_META structures stored in
  uint32_t current_meta_idx;

  // Pointer to a YR_RULES structure that represents the compiled rules. This
  // is what yr_compiler_get_rules returns. Once these rules are generated you
  // can't call any of the yr_compiler_add_xxx functions.
  YR_RULES* rules;

  int errors;
  int current_line;
  int last_error;
  int last_error_line;
  bool strict_escape;

  jmp_buf error_recovery;

  YR_AC_AUTOMATON* automaton;
  YR_HASH_TABLE* rules_table;
  YR_HASH_TABLE* objects_table;
  YR_HASH_TABLE* strings_table;

  // Hash table that contains all the identifiers with wildcards used in
  // conditions. This is used to make sure we error out if we are parsing a
  // rule _AFTER_ an existing rule has referenced it in a condition. For
  // example:
  // rule a1 { condition: true }
  // rule b { condition: 1 of (a*) }
  // rule a2 { condition: true }
  // This must be a compiler error when parsing a2 because b has already been
  // parsed and the instructions to check _ONLY_ a1 have been emitted. Rule b
  // has no concept of a2 and would not work as expected.
  YR_HASH_TABLE* wildcard_identifiers_table;

  // Hash table that contains all the strings that has been written to the
  // YR_SZ_POOL buffer in the compiler's arena. Values in the hash table are
  // the offset within the YR_SZ_POOL where the string resides. This allows to
  // know is some string has already been written in order to reuse instead of
  // writting it again.
  YR_HASH_TABLE* sz_table;

  YR_FIXUP* fixup_stack_head;

  int num_namespaces;

  int loop_index;
  int loop_for_of_var_index;

  char* file_name_stack[YR_MAX_INCLUDE_DEPTH];
  int file_name_stack_ptr;

  char last_error_extra_info[YR_MAX_COMPILER_ERROR_EXTRA_INFO];

  // This buffer is used by the lexer for accumulating text strings. Those
  // strings are copied from flex's internal variables. lex_buf_ptr points to
  // the end of the string and lex_buf_len contains the number of bytes that
  // have been copied into lex_buf.
  char lex_buf[YR_LEX_BUF_SIZE];
  char* lex_buf_ptr;
  unsigned short lex_buf_len;

  char include_base_dir[MAX_PATH];
  void* user_data;
  void* incl_clbk_user_data;
  void* re_ast_clbk_user_data;

  YR_ATOMS_CONFIG atoms_config;




  // File size of the file being scanned.
  uint64_t file_size;

  // Entry point of the file being scanned, if the file is PE or ELF.
  uint64_t entry_point;

  // Scanning flags.
  int flags;

  // Canary value used for preventing hand-crafted objects from being embedded
  // in compiled rules and used to exploit YARA. The canary value is initialized
  // to a random value and is subsequently set to all objects created by
  // yr_object_create. The canary is verified when objects are used by
  // yr_execute_code.
  int canary;

  // Scan timeout in nanoseconds.
  uint64_t timeout;

  // Pointer to user-provided data passed to the callback function.
  void* user_data;

  // Pointer to the user-provided callback function that is called when an
  // event occurs during the scan (a rule matching, a module being loaded, etc)
  YR_CALLBACK_FUNC callback;

  // Pointer to the YR_RULES object associated to this scan context.
  YR_RULES* rules;

  // Pointer to the YR_STRING causing the most recent scan error.
  YR_STRING* last_error_string;

  // Pointer to the iterator used for scanning

  // Pointer to a table mapping identifiers to YR_OBJECT structures. This table
  // contains entries for external variables and modules.
  YR_HASH_TABLE* objects_table;

  // Notebook used for storing YR_MATCH structures associated to the matches
  // found.
  YR_NOTEBOOK* matches_notebook;

  // Stopwatch used for measuring the time elapsed during the scan.
  YR_STOPWATCH stopwatch;

  // Fiber pool used by yr_re_exec.
  RE_FIBER_POOL re_fiber_pool;

  // Pool used by yr_re_fast_exec.
  RE_FAST_EXEC_POSITION_POOL re_fast_exec_position_pool;

  // A bitmap with one bit per rule, bit N is set when the rule with index N
  // has matched.
  YR_BITMASK* rule_matches_flags;

  // A bitmap with one bit per namespace, bit N is set if the namespace with
  // index N has some global rule that is not satisfied.
  YR_BITMASK* ns_unsatisfied_flags;

  // A bitmap with one bit per string, bit N is set if the string with index
  // N has too many matches.
  YR_BITMASK* strings_temp_disabled;

  // Array with pointers to lists of matches. Item N in the array has the
  // list of matches for string with index N.
  YR_MATCHES* matches;

  // "unconfirmed_matches" is like "matches" but for strings that are part of
  // a chain. Let's suppose that the string S is split in two chained strings
  // S1 <- S2. When a match is found for S1, we can't be sure that S matches
  // until a match for S2 is found (within the range defined by chain_gap_min
  // and chain_gap_max), so the matches for S1 are put in "unconfirmed_matches"
  // until they can be confirmed or discarded.
  YR_MATCHES* unconfirmed_matches;

  // A bitmap with one bit per rule, bit N is set if the corresponding rule
  // must evaluated.
  YR_BITMASK* required_eval;

  // profiling_info is a pointer to an array of YR_PROFILING_INFO structures,
  // one per rule. Entry N has the profiling information for rule with index N.
  YR_PROFILING_INFO* profiling_info;



struct YR_MATCH
  int64_t base;          // Base address for the match
  int64_t offset;        // Offset relative to base for the match
  int32_t match_length;  // Match length
  int32_t data_length;

  // Pointer to a buffer containing a portion of the matched data. The size of
  // the buffer is data_length. data_length is always <= length and is limited
  const uint8_t* data;

  YR_MATCH* prev;
  YR_MATCH* next;

  // If the match belongs to a chained string chain_length contains the
  // length of the chain. This field is used only in unconfirmed matches.
  int32_t chain_length;

  // True if this is match for a private string.
  bool is_private;

  // Set to the xor key if this is an xor string.
  uint8_t xor_key;



struct YR_META
  DECLARE_REFERENCE(const char*, identifier);
  DECLARE_REFERENCE(const char*, string);

  int64_t integer;
  int32_t type;
  int32_t flags;



  const char* module_name;
  void* module_data;
  size_t module_data_size;



struct YR_RULE
  int32_t flags;

  // Number of atoms generated for this rule.
  int32_t num_atoms;

  // Number of strings that must match for this rule to have some possibility
  // to match.
  uint32_t required_strings;

  // Just for padding.
  uint32_t unused;

  DECLARE_REFERENCE(const char*, identifier);
  DECLARE_REFERENCE(const char*, tags);



struct YR_RULES
  YR_ARENA* arena;

  // Array of pointers with an entry for each rule. The rule_idx field in the
  // YR_STRING structure is an index within this array.
    YR_RULE* rules_table;
    // The previous name for rules_table was rules_list_head, because this
    // was previously a linked list. The old name is maintained but marked as
    // deprecated, which will raise a warning if used.
    // TODO(vmalvarez): Remove this field when a reasonable a few versions
    // after 4.1 has been released.
    YR_DEPRECATED(YR_RULE* rules_list_head);

  // Array of pointers with an entry for each of the defined strings. The idx
  // field in the YR_STRING structure is an index within this array.
    YR_STRING* strings_table;
    // The previous name for strings_table was strings_list_head, because this
    // was previously a linked list. The old name is maintained but marked as
    // deprecated, which will raise a warning if used.
    // TODO(vmalvarez): Remove this field when a reasonable a few versions
    // after 4.1 has been released.
    YR_DEPRECATED(YR_STRING* strings_list_head);

  // Array of pointers with an entry for each external variable.
    YR_EXTERNAL_VARIABLE* ext_vars_table;
    // The previous name for ext_vars_table was externals_list_head, because
    // this was previously a linked list. The old name is maintained but marked
    // as deprecated, which will raise a warning if used.
    // TODO(vmalvarez): Remove this field when a reasonable a few versions
    // after 4.1 has been released.
    YR_DEPRECATED(YR_EXTERNAL_VARIABLE* externals_list_head);

  // Pointer to the Aho-Corasick transition table.
  YR_AC_TRANSITION* ac_transition_table;

  // A pointer to the arena where YR_AC_MATCH structures are allocated.
  YR_AC_MATCH* ac_match_pool;

  // Table that translates from Aho-Corasick states (which are identified by
  // numbers 0, 1, 2.. and so on) to the index in ac_match_pool where the
  // YR_AC_MATCH structures for the corresponding state start.
  // If the entry corresponding to state N in ac_match_table is zero, it
  // means that there's no match associated to the state. If it's non-zero,
  // its value is the 1-based index within ac_match_pool where the first
  // match resides.
  uint32_t* ac_match_table;

  // Pointer to the first instruction that is executed whan evaluating the
  // conditions for all rules. The code is executed by yr_execute_code and
  // the instructions are defined by the OP_X macros in exec.h.
  const uint8_t* code_start;

  // A bitmap with one bit per rule, bit N is set when the condition for rule
  // might evaluate to true even without any string matches.
  YR_BITMASK* no_required_strings;

  // Total number of rules.
  uint32_t num_rules;

  // Total number of strings.
  uint32_t num_strings;

  // Total number of namespaces.
  uint32_t num_namespaces;


        作为参数用于yr_rules_save_stream() and yr_rules_load_stream()函数中,这两个函数可以有用户自定义流的形式存储和读取Yara已编译后的规则信息。

typedef struct _YR_STREAM
  void* user_data;





struct YR_STRING
  // Flags, see STRING_FLAGS_XXX macros defined above.
  uint32_t flags;

  // Index of this string in the array of YR_STRING structures stored in
  uint32_t idx;

  // If the string can only match at a specific offset (for example if the
  // condition is "$a at 0" the string $a can only match at offset 0), the
  // fixed_offset field contains the offset, it have the YR_UNDEFINED value for
  // strings that can match anywhere.
  int64_t fixed_offset;

  // Index of the rule containing this string in the array of YR_RULE
  // structures stored in YR_RULES_TABLE.
  uint32_t rule_idx;

  // String's length.
  int32_t length;

  // Pointer to the string itself, the length is indicated by the "length"
  // field.
  DECLARE_REFERENCE(uint8_t*, string);

  // Strings are splitted in two or more parts when they contain a "gap" that
  // is larger than YR_STRING_CHAINING_THRESHOLD. This happens in strings like
  // { 01 02 03 04 [X-Y] 05 06 07 08 } if Y >= X + YR_STRING_CHAINING_THRESHOLD
  // and also in { 01 02 03 04 [-] 05 06 07 08 }. In both cases the strings are
  // split in { 01 02 03 04 } and { 05 06 07 08 }, and the two smaller strings
  // are searched for independently. If some string S is splitted in S1 and S2,
  // S2 is chained to S1. In the example above { 05 06 07 08 } is chained to
  // { 01 02 03 04 }. The same applies when the string is splitted in more than
  // two parts, if S is split in S1, S2, and S3. S3 is chained to S2 and S2 is
  // chained to S1 (it can represented as: S1 <- S2 <- S3).

  // When this string is chained to some other string, chain_gap_min and
  // chain_gap_max contain the minimum and maximum distance between the two
  // strings. For example in { 01 02 03 04 [X-Y] 05 06 07 08 }, the string
  // { 05 06 07 08 } is chained to { 01 02 03 04 } and chain_gap_min is X
  // and chain_gap_max is Y. These fields are ignored for strings that are not
  // part of a string chain.
  int32_t chain_gap_min;
  int32_t chain_gap_max;

  // Identifier of this string.
  DECLARE_REFERENCE(const char*, identifier);



  // Pointer to namespace's name.
  DECLARE_REFERENCE(const char*, name);

  // Index of this namespace in the array of YR_NAMESPACE structures stored
  // YR_ALIGN(8) forces the idx field to be treated as a 8-bytes field
  // and therefore the struct's size is 16 bytes. This is necessary only for
  // 32-bits versions of YARA compiled with Visual Studio. See: #1358.
  YR_ALIGN(8) uint32_t idx;

2. 函数接口

2.1 yr_initialize



int yr_initialize(void)

2.2 yr_finalize



int yr_finalize(void)

2.3 yr_compiler_create



int yr_compiler_create(YR_COMPILER **compiler)

2.4 yr_compiler_destroy


void yr_compiler_destroy(YR_COMPILER *compiler)

2.5 yr_compiler_set_callback


void yr_compiler_set_callback(YR_COMPILER *compiler, 
    YR_COMPILER_CALLBACK_FUNC callback, void *user_data)

2.6 yr_compiler_set_include_callback


void yr_compiler_set_include_callback(YR_COMPILER *compiler, 
    void *user_data)

2.7 yr_compiler_add_file

int yr_compiler_add_file(YR_COMPILER *compiler, 
        FILE *file, const char *namespace, const char *file_name)

2.8 yr_compiler_add_fd


int yr_compiler_add_fd(YR_COMPILER *compiler, 
        YR_FILE_DESCRIPTOR rules_fd, const char *namespace, const char *file_name)

2.9 yr_compiler_add_string


int yr_compiler_add_string(YR_COMPILER *compiler, 
        const char *string, const char *namespace_)

2.10 yr_compiler_get_rules


int yr_compiler_get_rules(YR_COMPILER *compiler, YR_RULES **rules)

2.11 yr_compiler_define_integer_variable


int yr_compiler_define_integer_variable(YR_COMPILER *compiler, 
        const char *identifier, int64_t value)

2.12 yr_compiler_define_float_variable


int yr_compiler_define_float_variable(YR_COMPILER *compiler, 
        const char *identifier, double value)

2.13 yr_compiler_define_boolean_variable


int yr_compiler_define_boolean_variable(YR_COMPILER *compiler, 
        const char *identifier, int value)

2.14 yr_compiler_define_string_variable


int yr_compiler_define_string_variable(YR_COMPILER *compiler, 
    const char *identifier, const char *value)

2.15 yr_rules_define_integer_variable


int yr_rules_define_integer_variable(YR_RULES *rules, 
        const char *identifier, int64_t value)

2.16 yr_rules_define_boolean_variable


int yr_rules_define_boolean_variable(YR_RULES *rules, 
        const char *identifier, int value)

2.17 yr_rules_define_float_variable


int yr_rules_define_float_variable(YR_RULES *rules, 
        const char *identifier, double value)

2.18 yr_rules_define_string_variable


int yr_rules_define_string_variable(YR_RULES *rules, 
        const char *identifier, const char *value)

2.19 yr_rules_destroy


void yr_rules_destroy(YR_RULES *rules)

2.20 yr_rules_save


int yr_rules_save(YR_RULES *rules, const char *filename)

2.21 yr_rules_save_stream


int yr_rules_save_stream(YR_RULES *rules, YR_STREAM *stream)

2.22 yr_rules_load


int yr_rules_load(const char *filename, YR_RULES **rules)

2.23 yr_rules_load_stream


int yr_rules_load_stream(YR_STREAM *stream, YR_RULES **rules)

2.24 yr_rules_scan_mem


int yr_rules_scan_mem(YR_RULES *rules, const uint8_t *buffer, 
        size_t buffer_size, int flags, YR_CALLBACK_FUNC callback, 
        void *user_data, int timeout)

2.25 yr_rules_scan_file


int yr_rules_scan_file(YR_RULES *rules, const char *filename, 
        int flags, YR_CALLBACK_FUNC callback, 
        void *user_data, int timeout)

2.26 yr_rules_scan_fd


int yr_rules_scan_fd(YR_RULES *rules, YR_FILE_DESCRIPTOR fd, 
        int flags, YR_CALLBACK_FUNC callback, void *user_data, int timeout)

2.27 yr_rule_disable


void yr_rule_disable(YR_RULE *rule)

2.28 yr_rule_enable


void yr_rule_enable(YR_RULE *rule)

2.29 yr_scanner_create


int yr_scanner_create(YR_RULES *rules, YR_SCANNER **scanner)

2.30 yr_scanner_destroy


void yr_scanner_destroy(YR_SCANNER *scanner)

2.31 yr_scanner_set_callback


void yr_scanner_set_callback(YR_SCANNER *scanner, 
        YR_CALLBACK_FUNC callback, void *user_data)

2.32 yr_scanner_set_timeout


void yr_scanner_set_timeout(YR_SCANNER *scanner, int timeout)

2.33 yr_scanner_set_flags







void yr_scanner_set_flags(YR_SCANNER *scanner, int flags)

2.34 yr_scanner_define_integer_variable


int yr_scanner_define_integer_variable(YR_SCANNER *scanner, const char *identifier, int64_t value)

2.35 yr_scanner_define_boolean_variable


int yr_scanner_define_boolean_variable(YR_SCANNER *scanner, const char *identifier, int value)

2.36 yr_scanner_define_float_variable


int yr_scanner_define_float_variable(YR_SCANNER *scanner, const char *identifier, double value)

2.37 yr_scanner_define_string_variable


int yr_scanner_define_string_variable(YR_SCANNER *scanner, const char *identifier, const char *value)

2.38 yr_scanner_scan_mem_blocks


int yr_scanner_scan_mem_blocks(YR_SCANNER *scanner, YR_MEMORY_BLOCK_ITERATOR *iterator)

2.39 yr_scanner_scan_mem


int yr_scanner_scan_mem(YR_SCANNER *scanner, const uint8_t *buffer, size_t buffer_size)

2.40  yr_scanner_scan_file


int yr_scanner_scan_file(YR_SCANNER *scanner, const char *filename)

2.41 yr_scanner_scan_fd


int yr_scanner_scan_fd(YR_SCANNER *scanner, YR_FILE_DESCRIPTOR fd)

2.42 yr_scanner_last_error_rule


YR_RULE *yr_scanner_last_error_rule(YR_SCANNER *scanner)

2.43 yr_scanner_last_error_string


YR_STRING *yr_scanner_last_error_string(YR_SCANNER *scanner)

3. 错误码























