Code Listener  [unstable] git snapshot
code_listener.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2009 Kamil Dudka <kdudka@redhat.com>
3  *
4  * This file is part of predator.
5  *
6  * predator is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * any later version.
10  *
11  * predator is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with predator. If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef H_GUARD_CODE_LISTENER_H
21 #define H_GUARD_CODE_LISTENER_H
22 
23 #include <stdbool.h>
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28 
29 /**
30  * @file code_listener.h
31  * @b code @b listener interface - useful to write compiler plug-ins, etc.
32  */
33 
34 /**
35  * generic location info.
36  * @note taken from gcc's expanded_location defined in <gcc/input.h>
37  */
38 struct cl_loc {
39  /**
40  * input file as zero-terminated string. If NULL, the location info is
41  * either invalid or not available.
42  */
43  const char *file;
44 
45  /**
46  * line number in the input file (starting at 1).
47  */
48  int line;
49 
50  /**
51  * column number in the input file (starting at 1).
52  */
53  int column;
54 
55  /**
56  * true, if the token is located in a system header.
57  * @note not used for now
58  */
59  bool sysp;
60 };
61 
62 /**
63  * nullified location info, which means "unknown location"
64  */
65 extern const struct cl_loc cl_loc_unknown;
66 
67 /**
68  * return loc if it is a valid location info; return fallback otherwise
69  */
70 const struct cl_loc* cl_loc_fallback(
71  const struct cl_loc *loc,
72  const struct cl_loc *fallback);
73 
74 /**
75  * type of function writing a message
76  * @param msg zero-terminated string to write
77  */
78 typedef void (*cl_print_fnc_t)(const char *msg);
79 
80 /**
81  * collection of call-backs for writing various types of messages
82  */
83 struct cl_init_data {
84  /**
85  * function to write debug messages (perhaps empty function when not
86  * debugging?)
87  */
89 
90  /**
91  * function to write a warning message (may or may not be related to
92  * location currently being processed)
93  */
95 
96  /**
97  * function to write an error message (may or may not be related to
98  * location currently being processed)
99  */
101 
102  /**
103  * function to write an information message (which usually gives some extra
104  * info related to previous warning/error message)
105  */
107 
108  /**
109  * function to display critical error and then terminate the application
110  * @attention This function should never return (TODO: attribute?).
111  */
113 
114  /**
115  * debugging level, the greater number the more output, 0 means no debugging
116  */
118 };
119 
120 /**
121  * global initialization - it sets message printing call-backs
122  * @param init_data - collection of call-backs to set (none of them can be NULL)
123  * @note You should call cl_global_cleanup() to free resources before exit.
124  */
125 void cl_global_init(struct cl_init_data *init_data);
126 
127 /**
128  * global initialization - it sets built-in functions to print messages
129  * @param app_name - name of the application which appears in all messages. If
130  * NULL is given, no application name will be printed.
131  * @param debug_level - debugging level, the greater number the more output, 0
132  * means no debugging
133  * @note You should call cl_global_cleanup() to free resources before exit.
134  */
136  const char *app_name,
137  int debug_level);
138 
139 /**
140  * free resources allocated by cl_global_init() or cl_global_init_defaults()
141  */
142 void cl_global_cleanup(void);
143 
144 /**
145  * symbol scope enumeration (linearly ordered)
146  */
148  /**
149  * scope is unlimited
150  */
152 
153  /**
154  * scope is limited to current source file
155  */
157 
158  /**
159  * scope is limited to currently processed function
160  */
162 };
163 
164 /**
165  * basic type enumeration
166  */
167 enum cl_type_e {
176  CL_TYPE_CHAR, /**< not used by the gcc plug-in at the moment */
180 
181  /**
182  * used only by cl_cst
183  */
185 };
186 
187 /**
188  * used for nesting types into another (composite) type
189  */
190 struct cl_type_item {
191  /**
192  * nested type
193  */
194  const struct cl_type *type;
195 
196  /**
197  * name of the nested item, mainly used by struct/union
198  * @attention structs and unions may have anonymous items
199  */
200  const char *name;
201 
202  /**
203  * offset of the nested item [in bytes], valid only for struct/union
204  */
205  int offset;
206 };
207 
208 /**
209  * type definition available for each operand. It can be also returned
210  * by cl_get_type_fnc_t function (useful to traverse type definition
211  * recursively).
212  */
213 struct cl_type {
214  /**
215  * type's unique ID
216  */
217  int uid;
218 
219  /**
220  * kind of type (enumeration)
221  */
223 
224  /**
225  * location of type's declaration (may or may not be valid)
226  */
227  struct cl_loc loc;
228 
229  /**
230  * scope of type's declaration
231  */
233 
234  /**
235  * type's name, or NULL for anonymous type
236  * @attention Even if the type is named, the name is not guaranteed to be
237  * unique. Neither the C language guarantees anything like that. This
238  * field is suitable only for error/warning messages and debugging. Types
239  * are distinguishable by cl_type::uid and never ever by cl_type::name.
240  */
241  const char *name;
242 
243  /**
244  * type's sizeof
245  */
246  int size;
247 
248  /**
249  * count of nested types
250  *
251  * - @b 0 for elementary types
252  * - @b 1 for e.g. CL_TYPE_PTR and CL_TYPE_ARRAY
253  * - @b n for e.g. CL_TYPE_STRUCT and CL_TYPE_UNION
254  * - 0 < @b n for CL_TYPE_FNC
255  */
256  int item_cnt;
257 
258  /**
259  * array of nested types. Its size is exactly item_cnt.
260  */
262 
263  /**
264  * valid only for @b CL_TYPE_ARRAY
265  */
267 
268  /**
269  * makes sense mainly for integral types, but should be always initialized
270  */
272 };
273 
274 /**
275  * basic accessor enumeration
276  */
278  /**
279  * reference, in the C language: &
280  */
282 
283  /**
284  * dereference, in the C language: *
285  * @note operator -> is decomposed as (*ptr).item
286  * (CL_ACCESSOR_DEREF followed by CL_ACCESSOR_ITEM)
287  */
289 
290  /**
291  * array accessor, in the C language: []
292  */
294 
295  /**
296  * record accessor, in the C language: .
297  */
299 
300  /**
301  * offset accessor, in the C language: ((char *)ptr + N)
302  */
304 };
305 
306 /**
307  * accessor definition, available for @b some operands. Accessors can be
308  * chained as needed.
309  */
310 struct cl_accessor {
311  /**
312  * kind of accessor (enumeration)
313  */
315 
316  /**
317  * reference to type which the accessor is used for
318  */
319  struct cl_type *type;
320 
321  /**
322  * next accessor, or NULL if this is the last one (singly-linked list)
323  */
324  struct cl_accessor *next;
325 
326  /**
327  * some accessors have extra data
328  */
329  union {
330  /* CL_ACCESSOR_DEREF_ARRAY */
331  struct {
332  /**
333  * operand used as index to access array's item,
334  * in the C language: array[index]
335  */
336  struct cl_operand *index;
337  } array; /**< valid only for @b CL_ACCESSOR_DEREF_ARRAY */
338 
339  /* CL_ACCESSOR_ITEM */
340  struct {
341  /**
342  * record's item ought to be accessed, enumeration starts with zero
343  */
344  int id;
345  } item; /**< valid only for @b CL_ACCESSOR_ITEM */
346 
347  /* CL_ACCESSOR_OFFSET */
348  struct {
349  /**
350  * offset to move the address by
351  */
352  int off;
353  } offset;
354  } data;
355 };
356 
357 /**
358  * constant, in the C language terminology: literal
359  */
360 struct cl_cst {
361  /**
362  * kind of constant (enumeration)
363  */
365 
366  /**
367  * per constant type specific data
368  */
369  union {
370  /* CL_TYPE_FNC */
371  struct {
372  /**
373  * unique ID of function's declaration
374  */
375  int uid;
376 
377  /**
378  * name of the function
379  */
380  const char *name;
381 
382  /**
383  * true if the function is external for the analysed module
384  */
385  bool is_extern;
386 
387  struct cl_loc loc;
388 
389  } cst_fnc; /**< valid only for @b CL_TYPE_FNC */
390 
391  /* CL_TYPE_INT when is_unsigned is false */
392  struct {
393  long value;
394  } cst_int; /**< valid only for @b CL_TYPE_INT and !is_unsigned */
395 
396  /* CL_TYPE_INT when is_unsigned is true */
397  struct {
398  unsigned long value;
399  } cst_uint; /**< valid only for @b CL_TYPE_INT and is_unsigned */
400 
401  /* CL_TYPE_STRING */
402  struct {
403  const char *value;
404  } cst_string; /**< valid only for @b CL_TYPE_STRING */
405 
406  /* CL_TYPE_REAL */
407  struct {
408  double value;
409  } cst_real; /**< valid only for @b CL_TYPE_REAL */
410  } data;
411 };
412 
413 /**
414  * variable (local, global, fnc argument, register, etc.)
415  */
416 struct cl_var {
417  /**
418  * unique ID, given by compiler (or any cl decorator on the way from there)
419  */
420  int uid;
421 
422  /**
423  * name of the variable, valid only for program variables (NULL for
424  * artificial variables, created by compiler)
425  */
426  const char *name;
427 
428  /**
429  * true for auxiliary variables introduced by the compiler
430  */
432 
433  /* TODO: is_extern? */
434 
435  struct cl_loc loc;
436 
437  /**
438  * (possibly empty) chain of initializers
439  */
441 
442  /**
443  * true if the variable is initialized (does not imply NULL != initial)
444  */
446 
447  /**
448  * true if the variable is external (defined in another module)
449  */
450  bool is_extern;
451 };
452 
453 /**
454  * generic operand kind enumeration
455  */
457  /**
458  * there is NO operand
459  */
461 
462  /**
463  * constant operand, in the C language: @b literal
464  * @note this has nothing to do with the C/C++ keyword 'const'
465  */
467 
468  /**
469  * variable can represent a local/global program variable, or a function
470  * argument. Each variable has its unique ID. Optionally it has also some
471  * name, if the variable indeed exists in the analysed program (in contrast
472  * to artificial variables created by the compiler).
473  */
475 };
476 
477 /**
478  * generic operand. There is in fact no operand if code==CL_OPERAND_VOID. Each
479  * operand with extra data has it's own member in the union data.
480  */
481 struct cl_operand {
482  /**
483  * kind of operand (enumeration)
484  */
486 
487  /**
488  * scope of the operand's validity
489  */
491 
492  /**
493  * type of operand, use type->uid to get its UID
494  */
495  struct cl_type *type;
496 
497  /**
498  * chain of accessors, or NULL if there are no accessors
499  */
501 
502  /**
503  * per operand type specific data
504  */
505  union {
506  /**
507  * valid only for @b CL_OPERAND_VAR
508  */
509  struct cl_var *var;
510 
511  /**
512  * valid only for @b CL_OPERAND_CST
513  */
514  struct cl_cst cst;
515  } data;
516 };
517 
518 /**
519  * basic instruction enumeration
520  */
521 enum cl_insn_e {
522  /**
523  * no instruction, used only internally for now
524  */
526 
527  /**
528  * @b goto instruction
529  */
531 
532  /**
533  * @b if (EXPR) instruction
534  */
536 
537  /**
538  * @b return instruction
539  */
541 
542  /**
543  * this follows each call of a function declared with attribute @b noreturn
544  */
546 
547  /**
548  * @b unary (lhs + 1) operation
549  */
551 
552  /**
553  * @b binary (lhs + 2) operation
554  */
556 
557  /**
558  * this code is never emitted by cl_insn callback
559  */
561 
562  /**
563  * this code is never emitted by cl_insn callback
564  */
566 
567  /**
568  * label
569  */
571 };
572 
573 /**
574  * return true if the given code denotes a terminal instruction
575  */
576 bool cl_is_term_insn(enum cl_insn_e code);
577 
578 /**
579  * unary operation's enumeration
580  */
581 enum cl_unop_e {
582  /**
583  * simple assignment
584  */
586 
587  /**
588  * truth not, in the C language: !
589  */
591 
592  /**
593  * bitwise not, in the C language: ~
594  */
596 
597  /**
598  * unary minus
599  */
601 
602  /**
603  * absolute value, applicable on numbers only
604  */
606 
607  /**
608  * conversion of an integer to a real
609  */
611 
612  /* TODO */
613 };
614 
615 /**
616  * binary operation's enumeration
617  */
619  /* comparison */
626 
627  /* logical */
631 
632  /* arithmetic */
642 
643  /* pointer arithmetic */
645 
646  /* bitwise */
650 
651  /* shift and rotation */
656 
657  /* TODO */
658 };
659 
660 /**
661  * single instruction definition
662  */
663 struct cl_insn {
664  /**
665  * kind of instruction (enumeration)
666  */
668 
669  /**
670  * location of the instruction's occurrence
671  */
672  struct cl_loc loc;
673 
674  /* instruction specific data */
675  union {
676 
677  /* CL_INSN_JMP */
678  struct {
679  const char *label;
680  } insn_jmp; /**< valid only for @b CL_INSN_JMP */
681 
682  /* CL_INSN_COND */
683  struct {
684  const struct cl_operand *src;
685  const char *then_label;
686  const char *else_label;
687  } insn_cond; /**< valid only for @b CL_INSN_COND */
688 
689  /* CL_INSN_RET */
690  struct {
691  const struct cl_operand *src;
692  } insn_ret; /**< valid only for @b CL_INSN_RET */
693 
694  /* CL_INSN_UNOP */
695  struct {
697  const struct cl_operand *dst;
698  const struct cl_operand *src;
699  } insn_unop; /**< valid only for @b CL_INSN_UNOP */
700 
701  /* CL_INSN_BINOP */
702  struct {
704  const struct cl_operand *dst;
705  const struct cl_operand *src1;
706  const struct cl_operand *src2;
707  } insn_binop; /**< valid only for @b CL_INSN_BINOP */
708 
709  /* CL_INSN_LABEL */
710  struct {
711  const char *name;
712  } insn_label; /**< valid only for @b CL_INSN_LABEL */
713 
714  } data;
715 };
716 
717 /**
718  * initializer, used mainly for global/static variables
719  */
721  struct cl_insn insn;
723 };
724 
725 /**
726  * listener object - the core part of this interface
727  *
728  * @verbatim
729  * It accepts a context-free language defined by substitution to regex:
730  *
731  * (file_open FILE_CONTENT file_close)* acknowledge destroy
732  *
733  *
734  * FILE_CONTENT is defined by substitution to regex:
735  *
736  * fnc_open (fnc_arg_decl)* FNC_BODY fnc_close
737  *
738  *
739  * FNC_BODY is defined by substitution to regex:
740  *
741  * FNC_ENTRY (bb_open (NONTERM_INSN)* TERM_INSN)*
742  *
743  *
744  * FNC_ENTRY is defined as:
745  *
746  * insn{CL_INSN_JMP}
747  *
748  *
749  * NON_TERM_INSN is defined as:
750  *
751  * INSN_CALL | insn{CL_INSN_UNOP, CL_INSN_BINOP}
752  *
753  *
754  * TERM_INSN is defined as:
755  *
756  * insn{CL_INSN_JMP, CL_INSN_COND, CL_INSN_RET, CL_INSN_ABORT} | INSN_SWITCH
757  *
758  *
759  * INSN_CALL is defined by regex:
760  *
761  * insn_call_open (insn_call_arg)* insn_call_close
762  *
763  *
764  * INSN_SWITCH is defined by regex:
765  *
766  * insn_switch_open (insn_switch_case)* insn_switch_close
767  *
768  * @endverbatim
769  */
771  /**
772  * listener's internal data. Do not use this member.
773  */
774  void *data;
775 
776  /**
777  * file open callback
778  * @param self Pointer to cl_code_listener object.
779  * @param file_name Zero-terminated string with file name being opened.
780  */
781  void (*file_open)(
782  struct cl_code_listener *self,
783  const char *file_name);
784 
785  /**
786  * file close callback
787  * @param self Pointer to cl_code_listener object.
788  */
789  void (*file_close)(
790  struct cl_code_listener *self);
791 
792  /**
793  * function definition initiation callback
794  * @param self Pointer to cl_code_listener object.
795  * @param fnc An operand used as function declaration (without args).
796  */
797  void (*fnc_open)(
798  struct cl_code_listener *self,
799  const struct cl_operand *fcn);
800 
801  /**
802  * function argument declaration callback
803  * @param self Pointer to cl_code_listener object.
804  * @param arg_id Position of the argument being specified.
805  * @param arg_src Function argument given as operand.
806  */
807  void (*fnc_arg_decl)(
808  struct cl_code_listener *self,
809  int arg_id,
810  const struct cl_operand *arg_src);
811 
812  /**
813  * function definition complete callback
814  * @param self Pointer to cl_code_listener object.
815  */
816  void (*fnc_close)(
817  struct cl_code_listener *self);
818 
819  /**
820  * basic block initiation callback
821  * @param self Pointer to cl_code_listener object.
822  * @param label Zero-terminated string containing label (and thus BB) name
823  */
824  void (*bb_open)(
825  struct cl_code_listener *self,
826  const char *label);
827 
828  /**
829  * one-shot instruction callback
830  * @param self Pointer to cl_code_listener object.
831  * @param insn Instruction definition.
832  */
833  void (*insn)(
834  struct cl_code_listener *self,
835  const struct cl_insn *insn);
836 
837  /**
838  * CL_INSN_CALL initiation callback
839  * @param self Pointer to cl_code_listener object.
840  * @param loc location of the function definition
841  * @param dst An operand taking fnc's return value, may be CL_OPERAND_VOID
842  * @param fnc An operand used as function to call (not necessarily constant)
843  */
844  void (*insn_call_open)(
845  struct cl_code_listener *self,
846  const struct cl_loc *loc,
847  const struct cl_operand *dst,
848  const struct cl_operand *fnc);
849 
850  /**
851  * CL_INSN_CALL per argument callback
852  * @param self Pointer to cl_code_listener object.
853  * @param arg_id Position of the argument being specified.
854  * @param arg_src Call argument given as operand.
855  */
856  void (*insn_call_arg)(
857  struct cl_code_listener *self,
858  int arg_id,
859  const struct cl_operand *arg_src);
860 
861  /**
862  * CL_INSN_CALL complete callback
863  * @param self Pointer to cl_code_listener object.
864  */
866  struct cl_code_listener *self);
867 
868  /**
869  * CL_INSN_SWITCH initiation callback
870  * @param self Pointer to cl_code_listener object.
871  * @param loc location of the function definition
872  * @param src An operand used as switch source.
873  */
875  struct cl_code_listener *self,
876  const struct cl_loc *loc,
877  const struct cl_operand *src);
878 
879  /**
880  * CL_INSN_SWITCH per case callback
881  *
882  * For a singleton case branch, @b val_lo and @b val_hi hold the same
883  * integral constants (literals). Otherwise, an @b integral @b range of
884  * constants can be specified. @b Default @b target is encoded as
885  * (CL_OPERAND_VOID == val_lo->code && CL_OPERAND_VOID == val_hi->code).
886  * @param self Pointer to cl_code_listener object.
887  * @param loc location of the function definition
888  * @param val_lo Begin of the range for given case.
889  * @param val_hi End of the range for given case.
890  * @param label Zero-terminated string containing label name
891  */
893  struct cl_code_listener *self,
894  const struct cl_loc *loc,
895  const struct cl_operand *val_lo,
896  const struct cl_operand *val_hi,
897  const char *label);
898 
899  /**
900  * CL_INSN_SWITCH complete callback
901  * @param self Pointer to cl_code_listener object.
902  */
904  struct cl_code_listener *self);
905 
906  /**
907  * acknowledge that all regular callbacks have been sent and are supposed
908  * to be valid.
909  */
910  void (*acknowledge)(
911  struct cl_code_listener *self);
912 
913  /**
914  * destroy code listener object
915  * @param self Pointer to cl_code_listener object.
916  */
917  void (*destroy)(
918  struct cl_code_listener *self);
919 };
920 
921 /**
922  * create cl_code_listener object
923  * @param config_string determines the type and attributes of the listener.
924  * @note config_string is currently undocumented. You can look to slplug.c for
925  * examples or turn on verbose output to see how it is parsed.
926  */
927 struct cl_code_listener* cl_code_listener_create(const char *config_string);
928 
929 /**
930  * create cl_code_listener object for grouping another cl_code_listener objects
931  * @return Returns on heap allocated cl_code_listener object which does nothing.
932  */
933 struct cl_code_listener* cl_chain_create(void);
934 
935 /**
936  * append cl_code_listener object to chain
937  * @param chain Object returned by cl_chain_create() function.
938  * @param listener Object ought to be added to the chain.
939  * @note Listener objects are notified in the same order as they are added.
940  */
941 void cl_chain_append(
942  struct cl_code_listener *chain,
943  struct cl_code_listener *listener);
944 
945 #ifdef __cplusplus
946 }
947 #endif
948 
949 #endif /* H_GUARD_CODE_LISTENER_H */