Predator  [unstable] git snapshot
code_listener.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2009 Kamil Dudka <kdudka@redhat.com>
3  *
4  * This file is part of predator.
5  *
6  * predator is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * any later version.
10  *
11  * predator is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with predator. If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef H_GUARD_CODE_LISTENER_H
21 #define H_GUARD_CODE_LISTENER_H
22 
23 #include <stdbool.h>
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28 
29 /**
30  * @file code_listener.h
31  * @b code @b listener interface - useful to write compiler plug-ins, etc.
32  */
33 
34 /**
35  * generic location info.
36  * @note taken from gcc's expanded_location defined in <gcc/input.h>
37  */
38 struct cl_loc {
39  /**
40  * input file as zero-terminated string. If NULL, the location info is
41  * either invalid or not available.
42  */
43  const char *file;
44 
45  /**
46  * line number in the input file (starting at 1).
47  */
48  int line;
49 
50  /**
51  * column number in the input file (starting at 1).
52  */
53  int column;
54 
55  /**
56  * true, if the token is located in a system header.
57  * @note not used for now
58  */
59  bool sysp;
60 };
61 
62 /**
63  * nullified location info, which means "unknown location"
64  */
65 extern const struct cl_loc cl_loc_unknown;
66 
67 /**
68  * return loc if it is a valid location info; return fallback otherwise
69  */
70 const struct cl_loc* cl_loc_fallback(
71  const struct cl_loc *loc,
72  const struct cl_loc *fallback);
73 
74 /**
75  * type of function writing a message
76  * @param msg zero-terminated string to write
77  */
78 typedef void (*cl_print_fnc_t)(const char *msg);
79 
80 /**
81  * collection of call-backs for writing various types of messages
82  */
83 struct cl_init_data {
84  /**
85  * function to write debug messages (perhaps empty function when not
86  * debugging?)
87  */
89 
90  /**
91  * function to write a warning message (may or may not be related to
92  * location currently being processed)
93  */
95 
96  /**
97  * function to write an error message (may or may not be related to
98  * location currently being processed)
99  */
101 
102  /**
103  * function to write an information message (which usually gives some extra
104  * info related to previous warning/error message)
105  */
107 
108  /**
109  * function to display critical error and then terminate the application
110  * @attention This function should never return (TODO: attribute?).
111  */
113 
114  /**
115  * debugging level, the greater number the more output, 0 means no debugging
116  */
118 };
119 
120 /**
121  * global initialization - it sets message printing call-backs
122  * @param init_data - collection of call-backs to set (none of them can be NULL)
123  * @note You should call cl_global_cleanup() to free resources before exit.
124  */
125 void cl_global_init(struct cl_init_data *init_data);
126 
127 /**
128  * global initialization - it sets built-in functions to print messages
129  * @param app_name - name of the application which appears in all messages. If
130  * NULL is given, no application name will be printed.
131  * @param debug_level - debugging level, the greater number the more output, 0
132  * means no debugging
133  * @note You should call cl_global_cleanup() to free resources before exit.
134  */
136  const char *app_name,
137  int debug_level);
138 
139 /**
140  * free resources allocated by cl_global_init() or cl_global_init_defaults()
141  */
142 void cl_global_cleanup(void);
143 
144 /**
145  * symbol scope enumeration (linearly ordered)
146  */
148  /**
149  * scope is unlimited
150  */
152 
153  /**
154  * scope is limited to current source file
155  */
157 
158  /**
159  * scope is limited to currently processed function
160  */
162 };
163 
164 /**
165  * basic type enumeration
166  */
167 enum cl_type_e {
176  CL_TYPE_CHAR, /**< not used by the gcc plug-in at the moment */
180 
181  /**
182  * used only by cl_cst
183  */
185 };
186 
187 /**
188  * used for nesting types into another (composite) type
189  */
190 struct cl_type_item {
191  /**
192  * nested type
193  */
194  const struct cl_type *type;
195 
196  /**
197  * name of the nested item, mainly used by struct/union
198  * @attention structs and unions may have anonymous items
199  */
200  const char *name;
201 
202  /**
203  * offset of the nested item [in bytes], valid only for struct/union
204  */
205  int offset;
206 };
207 
208 /**
209  * used for specialization of CL_TYPE_PTR, which can also represent both C++
210  * lvalue and rvalue references.
211  */
213  CL_PTR_TYPE_NOT_PTR, /**< to avoid accidental misuse bugs */
216  CL_PTR_TYPE_RVALUE_REF /**< used only when C++11 code is processed */
217 };
218 
219 /**
220  * type definition available for each operand. It can be also returned
221  * by cl_get_type_fnc_t function (useful to traverse type definition
222  * recursively).
223  */
224 struct cl_type {
225  /**
226  * type's unique ID
227  */
228  int uid;
229 
230  /**
231  * kind of type (enumeration)
232  */
234 
235  /**
236  * location of type's declaration (may or may not be valid)
237  */
238  struct cl_loc loc;
239 
240  /**
241  * scope of type's declaration
242  */
244 
245  /**
246  * type's name, or NULL for anonymous type
247  * @attention Even if the type is named, the name is not guaranteed to be
248  * unique. Neither the C language guarantees anything like that. This
249  * field is suitable only for error/warning messages and debugging. Types
250  * are distinguishable by cl_type::uid and never ever by cl_type::name.
251  */
252  const char *name;
253 
254  /**
255  * type's sizeof
256  */
257  int size;
258 
259  /**
260  * count of nested types
261  *
262  * - @b 0 for elementary types
263  * - @b 1 for e.g. CL_TYPE_PTR and CL_TYPE_ARRAY
264  * - @b n for e.g. CL_TYPE_STRUCT and CL_TYPE_UNION
265  * - 0 < @b n for CL_TYPE_FNC
266  */
267  int item_cnt;
268 
269  /**
270  * array of nested types. Its size is exactly item_cnt.
271  */
273 
274  /**
275  * valid only for @b CL_TYPE_ARRAY
276  */
278 
279  /**
280  * makes sense mainly for integral types, but should be always initialized
281  */
283 
284  /**
285  * indicates if the type has a 'const' modificator set or not.
286  */
287  bool is_const;
288 
289  /**
290  * type of pointer (enumeration) to represent possible usage of C++
291  * references ->> use makes sense only when C++ analysis is also supported
292  * and when 'code' == CL_TYPE_PTR
293  */
295 };
296 
297 /**
298  * basic accessor enumeration
299  */
301  /**
302  * reference, in the C language: &
303  */
305 
306  /**
307  * dereference, in the C language: *
308  * @note operator -> is decomposed as (*ptr).item
309  * (CL_ACCESSOR_DEREF followed by CL_ACCESSOR_ITEM)
310  */
312 
313  /**
314  * array accessor, in the C language: []
315  */
317 
318  /**
319  * record accessor, in the C language: .
320  */
322 
323  /**
324  * offset accessor, in the C language: ((char *)ptr + N)
325  */
327 };
328 
329 /**
330  * accessor definition, available for @b some operands. Accessors can be
331  * chained as needed.
332  */
333 struct cl_accessor {
334  /**
335  * kind of accessor (enumeration)
336  */
338 
339  /**
340  * reference to type which the accessor is used for
341  */
342  struct cl_type *type;
343 
344  /**
345  * next accessor, or NULL if this is the last one (singly-linked list)
346  */
347  struct cl_accessor *next;
348 
349  /**
350  * some accessors have extra data
351  */
352  union {
353  /* CL_ACCESSOR_DEREF_ARRAY */
354  struct {
355  /**
356  * operand used as index to access array's item,
357  * in the C language: array[index]
358  */
359  struct cl_operand *index;
360  } array; /**< valid only for @b CL_ACCESSOR_DEREF_ARRAY */
361 
362  /* CL_ACCESSOR_ITEM */
363  struct {
364  /**
365  * record's item ought to be accessed, enumeration starts with zero
366  */
367  int id;
368  } item; /**< valid only for @b CL_ACCESSOR_ITEM */
369 
370  /* CL_ACCESSOR_OFFSET */
371  struct {
372  /**
373  * offset to move the address by
374  */
375  int off;
376  } offset;
377  } data;
378 };
379 
380 /**
381  * constant, in the C language terminology: literal
382  */
383 struct cl_cst {
384  /**
385  * kind of constant (enumeration)
386  */
388 
389  /**
390  * per constant type specific data
391  */
392  union {
393  /* CL_TYPE_FNC */
394  struct {
395  /**
396  * unique ID of function's declaration
397  */
398  int uid;
399 
400  /**
401  * name of the function
402  */
403  const char *name;
404 
405  /**
406  * true if the function is external for the analysed module
407  */
408  bool is_extern;
409 
410  struct cl_loc loc;
411 
412  } cst_fnc; /**< valid only for @b CL_TYPE_FNC */
413 
414  /* CL_TYPE_INT when is_unsigned is false */
415  struct {
416  long value;
417  } cst_int; /**< valid only for @b CL_TYPE_INT and !is_unsigned */
418 
419  /* CL_TYPE_INT when is_unsigned is true */
420  struct {
421  unsigned long value;
422  } cst_uint; /**< valid only for @b CL_TYPE_INT and is_unsigned */
423 
424  /* CL_TYPE_STRING */
425  struct {
426  const char *value;
427  } cst_string; /**< valid only for @b CL_TYPE_STRING */
428 
429  /* CL_TYPE_REAL */
430  struct {
431  double value;
432  } cst_real; /**< valid only for @b CL_TYPE_REAL */
433  } data;
434 };
435 
436 /**
437  * variable (local, global, fnc argument, register, etc.)
438  */
439 struct cl_var {
440  /**
441  * unique ID, given by compiler (or any cl decorator on the way from there)
442  */
443  int uid;
444 
445  /**
446  * name of the variable, valid only for program variables (NULL for
447  * artificial variables, created by compiler)
448  */
449  const char *name;
450 
451  /**
452  * true for auxiliary variables introduced by the compiler
453  */
455 
456  /* TODO: is_extern? */
457 
458  struct cl_loc loc;
459 
460  /**
461  * (possibly empty) chain of initializers
462  */
464 
465  /**
466  * true if the variable is initialized (does not imply NULL != initial)
467  */
469 
470  /**
471  * true if the variable is external (defined in another module)
472  */
473  bool is_extern;
474 };
475 
476 /**
477  * generic operand kind enumeration
478  */
480  /**
481  * there is NO operand
482  */
484 
485  /**
486  * constant operand, in the C language: @b literal
487  * @note this has nothing to do with the C/C++ keyword 'const'
488  */
490 
491  /**
492  * variable can represent a local/global program variable, or a function
493  * argument. Each variable has its unique ID. Optionally it has also some
494  * name, if the variable indeed exists in the analysed program (in contrast
495  * to artificial variables created by the compiler).
496  */
498 };
499 
500 /**
501  * generic operand. There is in fact no operand if code==CL_OPERAND_VOID. Each
502  * operand with extra data has it's own member in the union data.
503  */
504 struct cl_operand {
505  /**
506  * kind of operand (enumeration)
507  */
509 
510  /**
511  * scope of the operand's validity
512  */
514 
515  /**
516  * type of operand, use type->uid to get its UID
517  */
518  struct cl_type *type;
519 
520  /**
521  * chain of accessors, or NULL if there are no accessors
522  */
524 
525  /**
526  * per operand type specific data
527  */
528  union {
529  /**
530  * valid only for @b CL_OPERAND_VAR
531  */
532  struct cl_var *var;
533 
534  /**
535  * valid only for @b CL_OPERAND_CST
536  */
537  struct cl_cst cst;
538  } data;
539 };
540 
541 /**
542  * basic instruction enumeration
543  */
544 enum cl_insn_e {
545  /**
546  * no instruction, used only internally for now
547  */
549 
550  /**
551  * @b goto instruction
552  */
554 
555  /**
556  * @b if (EXPR) instruction
557  */
559 
560  /**
561  * @b return instruction
562  */
564 
565  /**
566  * this follows each call of a function declared with attribute @b noreturn
567  */
569 
570  /**
571  * @b unary (lhs + 1) operation
572  */
574 
575  /**
576  * @b binary (lhs + 2) operation
577  */
579 
580  /**
581  * this code is never emitted by cl_insn callback
582  */
584 
585  /**
586  * this code is never emitted by cl_insn callback
587  */
589 
590  /**
591  * label
592  */
594 };
595 
596 /**
597  * return true if the given code denotes a terminal instruction
598  */
599 bool cl_is_term_insn(enum cl_insn_e code);
600 
601 /**
602  * unary operation's enumeration
603  */
604 enum cl_unop_e {
605  /**
606  * simple assignment
607  */
609 
610  /**
611  * truth not, in the C language: !
612  */
614 
615  /**
616  * bitwise not, in the C language: ~
617  */
619 
620  /**
621  * unary minus
622  */
624 
625  /**
626  * absolute value, applicable on numbers only
627  */
629 
630  /**
631  * conversion of an integer to a real
632  */
634 
635  /* TODO */
636 };
637 
638 /**
639  * binary operation's enumeration
640  */
642  /* comparison */
649 
650  /* logical */
654 
655  /* arithmetic */
665 
666  /* pointer arithmetic */
668 
669  /* bitwise */
673 
674  /* shift and rotation */
679 
680  /* TODO */
681 
682  /* unknown binary operation */
684 };
685 
686 /**
687  * single instruction definition
688  */
689 struct cl_insn {
690  /**
691  * kind of instruction (enumeration)
692  */
694 
695  /**
696  * location of the instruction's occurrence
697  */
698  struct cl_loc loc;
699 
700  /* instruction specific data */
701  union {
702 
703  /* CL_INSN_JMP */
704  struct {
705  const char *label;
706  } insn_jmp; /**< valid only for @b CL_INSN_JMP */
707 
708  /* CL_INSN_COND */
709  struct {
710  const struct cl_operand *src;
711  const char *then_label;
712  const char *else_label;
713  } insn_cond; /**< valid only for @b CL_INSN_COND */
714 
715  /* CL_INSN_RET */
716  struct {
717  const struct cl_operand *src;
718  } insn_ret; /**< valid only for @b CL_INSN_RET */
719 
720  /* CL_INSN_UNOP */
721  struct {
723  const struct cl_operand *dst;
724  const struct cl_operand *src;
725  } insn_unop; /**< valid only for @b CL_INSN_UNOP */
726 
727  /* CL_INSN_BINOP */
728  struct {
730  const struct cl_operand *dst;
731  const struct cl_operand *src1;
732  const struct cl_operand *src2;
733  } insn_binop; /**< valid only for @b CL_INSN_BINOP */
734 
735  /* CL_INSN_LABEL */
736  struct {
737  const char *name;
738  } insn_label; /**< valid only for @b CL_INSN_LABEL */
739 
740  } data;
741 };
742 
743 /**
744  * initializer, used mainly for global/static variables
745  */
747  struct cl_insn insn;
749 };
750 
751 /**
752  * listener object - the core part of this interface
753  *
754  * @verbatim
755  * It accepts a context-free language defined by substitution to regex:
756  *
757  * (file_open FILE_CONTENT file_close)* acknowledge destroy
758  *
759  *
760  * FILE_CONTENT is defined by substitution to regex:
761  *
762  * fnc_open (fnc_arg_decl)* FNC_BODY fnc_close
763  *
764  *
765  * FNC_BODY is defined by substitution to regex:
766  *
767  * FNC_ENTRY (bb_open (NONTERM_INSN)* TERM_INSN)*
768  *
769  *
770  * FNC_ENTRY is defined as:
771  *
772  * insn{CL_INSN_JMP}
773  *
774  *
775  * NON_TERM_INSN is defined as:
776  *
777  * INSN_CALL | insn{CL_INSN_UNOP, CL_INSN_BINOP}
778  *
779  *
780  * TERM_INSN is defined as:
781  *
782  * insn{CL_INSN_JMP, CL_INSN_COND, CL_INSN_RET, CL_INSN_ABORT} | INSN_SWITCH
783  *
784  *
785  * INSN_CALL is defined by regex:
786  *
787  * insn_call_open (insn_call_arg)* insn_call_close
788  *
789  *
790  * INSN_SWITCH is defined by regex:
791  *
792  * insn_switch_open (insn_switch_case)* insn_switch_close
793  *
794  * @endverbatim
795  */
797  /**
798  * listener's internal data. Do not use this member.
799  */
800  void *data;
801 
802  /**
803  * file open callback
804  * @param self Pointer to cl_code_listener object.
805  * @param file_name Zero-terminated string with file name being opened.
806  */
807  void (*file_open)(
808  struct cl_code_listener *self,
809  const char *file_name);
810 
811  /**
812  * file close callback
813  * @param self Pointer to cl_code_listener object.
814  */
815  void (*file_close)(
816  struct cl_code_listener *self);
817 
818  /**
819  * function definition initiation callback
820  * @param self Pointer to cl_code_listener object.
821  * @param fnc An operand used as function declaration (without args).
822  */
823  void (*fnc_open)(
824  struct cl_code_listener *self,
825  const struct cl_operand *fcn);
826 
827  /**
828  * function argument declaration callback
829  * @param self Pointer to cl_code_listener object.
830  * @param arg_id Position of the argument being specified.
831  * @param arg_src Function argument given as operand.
832  */
833  void (*fnc_arg_decl)(
834  struct cl_code_listener *self,
835  int arg_id,
836  const struct cl_operand *arg_src);
837 
838  /**
839  * function definition complete callback
840  * @param self Pointer to cl_code_listener object.
841  */
842  void (*fnc_close)(
843  struct cl_code_listener *self);
844 
845  /**
846  * basic block initiation callback
847  * @param self Pointer to cl_code_listener object.
848  * @param label Zero-terminated string containing label (and thus BB) name
849  */
850  void (*bb_open)(
851  struct cl_code_listener *self,
852  const char *label);
853 
854  /**
855  * one-shot instruction callback
856  * @param self Pointer to cl_code_listener object.
857  * @param insn Instruction definition.
858  */
859  void (*insn)(
860  struct cl_code_listener *self,
861  const struct cl_insn *insn);
862 
863  /**
864  * CL_INSN_CALL initiation callback
865  * @param self Pointer to cl_code_listener object.
866  * @param loc location of the function definition
867  * @param dst An operand taking fnc's return value, may be CL_OPERAND_VOID
868  * @param fnc An operand used as function to call (not necessarily constant)
869  */
870  void (*insn_call_open)(
871  struct cl_code_listener *self,
872  const struct cl_loc *loc,
873  const struct cl_operand *dst,
874  const struct cl_operand *fnc);
875 
876  /**
877  * CL_INSN_CALL per argument callback
878  * @param self Pointer to cl_code_listener object.
879  * @param arg_id Position of the argument being specified.
880  * @param arg_src Call argument given as operand.
881  */
882  void (*insn_call_arg)(
883  struct cl_code_listener *self,
884  int arg_id,
885  const struct cl_operand *arg_src);
886 
887  /**
888  * CL_INSN_CALL complete callback
889  * @param self Pointer to cl_code_listener object.
890  */
892  struct cl_code_listener *self);
893 
894  /**
895  * CL_INSN_SWITCH initiation callback
896  * @param self Pointer to cl_code_listener object.
897  * @param loc location of the function definition
898  * @param src An operand used as switch source.
899  */
901  struct cl_code_listener *self,
902  const struct cl_loc *loc,
903  const struct cl_operand *src);
904 
905  /**
906  * CL_INSN_SWITCH per case callback
907  *
908  * For a singleton case branch, @b val_lo and @b val_hi hold the same
909  * integral constants (literals). Otherwise, an @b integral @b range of
910  * constants can be specified. @b Default @b target is encoded as
911  * (CL_OPERAND_VOID == val_lo->code && CL_OPERAND_VOID == val_hi->code).
912  * @param self Pointer to cl_code_listener object.
913  * @param loc location of the function definition
914  * @param val_lo Begin of the range for given case.
915  * @param val_hi End of the range for given case.
916  * @param label Zero-terminated string containing label name
917  */
919  struct cl_code_listener *self,
920  const struct cl_loc *loc,
921  const struct cl_operand *val_lo,
922  const struct cl_operand *val_hi,
923  const char *label);
924 
925  /**
926  * CL_INSN_SWITCH complete callback
927  * @param self Pointer to cl_code_listener object.
928  */
930  struct cl_code_listener *self);
931 
932  /**
933  * acknowledge that all regular callbacks have been sent and are supposed
934  * to be valid.
935  */
936  void (*acknowledge)(
937  struct cl_code_listener *self);
938 
939  /**
940  * destroy code listener object
941  * @param self Pointer to cl_code_listener object.
942  */
943  void (*destroy)(
944  struct cl_code_listener *self);
945 };
946 
947 /**
948  * create cl_code_listener object
949  * @param config_string determines the type and attributes of the listener.
950  * @note config_string is currently undocumented. You can look to slplug.c for
951  * examples or turn on verbose output to see how it is parsed.
952  */
953 struct cl_code_listener* cl_code_listener_create(const char *config_string);
954 
955 /**
956  * create cl_code_listener object for grouping another cl_code_listener objects
957  * @return Returns on heap allocated cl_code_listener object which does nothing.
958  */
959 struct cl_code_listener* cl_chain_create(void);
960 
961 /**
962  * append cl_code_listener object to chain
963  * @param chain Object returned by cl_chain_create() function.
964  * @param listener Object ought to be added to the chain.
965  * @note Listener objects are notified in the same order as they are added.
966  */
967 void cl_chain_append(
968  struct cl_code_listener *chain,
969  struct cl_code_listener *listener);
970 
971 #ifdef __cplusplus
972 }
973 #endif
974 
975 #endif /* H_GUARD_CODE_LISTENER_H */