Commit | Line | Data |
---|---|---|
6badd1a5 | 1 | package Opcode; |
2 | ||
3b825e41 | 3 | use 5.006_001; |
6badd1a5 | 4 | |
b75c8c73 MS |
5 | use strict; |
6 | ||
4eb3f1b8 | 7 | our($VERSION, @ISA, @EXPORT_OK); |
6badd1a5 | 8 | |
a7fd8ef6 | 9 | $VERSION = "1.25"; |
6badd1a5 | 10 | |
6badd1a5 | 11 | use Carp; |
12 | use Exporter (); | |
da4061d3 | 13 | use XSLoader; |
6badd1a5 | 14 | |
15 | BEGIN { | |
b75c8c73 | 16 | @ISA = qw(Exporter); |
6badd1a5 | 17 | @EXPORT_OK = qw( |
18 | opset ops_to_opset | |
19 | opset_to_ops opset_to_hex invert_opset | |
20 | empty_opset full_opset | |
21 | opdesc opcodes opmask define_optag | |
22 | opmask_add verify_opset opdump | |
23 | ); | |
24 | } | |
25 | ||
68dc0745 | 26 | sub opset (;@); |
27 | sub opset_to_hex ($); | |
28 | sub opdump (;$); | |
6badd1a5 | 29 | use subs @EXPORT_OK; |
30 | ||
da4061d3 | 31 | XSLoader::load(); |
6badd1a5 | 32 | |
33 | _init_optags(); | |
34 | ||
68dc0745 | 35 | sub ops_to_opset { opset @_ } # alias for old name |
6badd1a5 | 36 | |
37 | sub opset_to_hex ($) { | |
38 | return "(invalid opset)" unless verify_opset($_[0]); | |
39 | unpack("h*",$_[0]); | |
40 | } | |
41 | ||
42 | sub opdump (;$) { | |
43 | my $pat = shift; | |
44 | # handy utility: perl -MOpcode=opdump -e 'opdump File' | |
45 | foreach(opset_to_ops(full_opset)) { | |
46 | my $op = sprintf " %12s %s\n", $_, opdesc($_); | |
47 | next if defined $pat and $op !~ m/$pat/i; | |
48 | print $op; | |
49 | } | |
50 | } | |
51 | ||
52 | ||
53 | ||
54 | sub _init_optags { | |
55 | my(%all, %seen); | |
56 | @all{opset_to_ops(full_opset)} = (); # keys only | |
57 | ||
7a57407b | 58 | local($_); |
6badd1a5 | 59 | local($/) = "\n=cut"; # skip to optags definition section |
60 | <DATA>; | |
61 | $/ = "\n="; # now read in 'pod section' chunks | |
62 | while(<DATA>) { | |
63 | next unless m/^item\s+(:\w+)/; | |
64 | my $tag = $1; | |
65 | ||
66 | # Split into lines, keep only indented lines | |
67 | my @lines = grep { m/^\s/ } split(/\n/); | |
be1d34d7 | 68 | foreach (@lines) { s/(?:\t|--).*// } # delete comments |
6badd1a5 | 69 | my @ops = map { split ' ' } @lines; # get op words |
70 | ||
71 | foreach(@ops) { | |
72 | warn "$tag - $_ already tagged in $seen{$_}\n" if $seen{$_}; | |
73 | $seen{$_} = $tag; | |
74 | delete $all{$_}; | |
75 | } | |
76 | # opset will croak on invalid names | |
77 | define_optag($tag, opset(@ops)); | |
78 | } | |
79 | close(DATA); | |
80 | warn "Untagged opnames: ".join(' ',keys %all)."\n" if %all; | |
81 | } | |
82 | ||
83 | ||
84 | 1; | |
85 | ||
86 | __DATA__ | |
87 | ||
88 | =head1 NAME | |
89 | ||
90 | Opcode - Disable named opcodes when compiling perl code | |
91 | ||
92 | =head1 SYNOPSIS | |
93 | ||
94 | use Opcode; | |
95 | ||
96 | ||
97 | =head1 DESCRIPTION | |
98 | ||
99 | Perl code is always compiled into an internal format before execution. | |
100 | ||
101 | Evaluating perl code (e.g. via "eval" or "do 'file'") causes | |
102 | the code to be compiled into an internal format and then, | |
103 | provided there was no error in the compilation, executed. | |
104 | The internal format is based on many distinct I<opcodes>. | |
105 | ||
106 | By default no opmask is in effect and any code can be compiled. | |
107 | ||
108 | The Opcode module allow you to define an I<operator mask> to be in | |
109 | effect when perl I<next> compiles any code. Attempting to compile code | |
110 | which contains a masked opcode will cause the compilation to fail | |
111 | with an error. The code will not be executed. | |
112 | ||
113 | =head1 NOTE | |
114 | ||
115 | The Opcode module is not usually used directly. See the ops pragma and | |
116 | Safe modules for more typical uses. | |
117 | ||
118 | =head1 WARNING | |
119 | ||
120 | The authors make B<no warranty>, implied or otherwise, about the | |
121 | suitability of this software for safety or security purposes. | |
122 | ||
123 | The authors shall not in any case be liable for special, incidental, | |
124 | consequential, indirect or other similar damages arising from the use | |
125 | of this software. | |
126 | ||
127 | Your mileage will vary. If in any doubt B<do not use it>. | |
128 | ||
129 | ||
130 | =head1 Operator Names and Operator Lists | |
131 | ||
132 | The canonical list of operator names is the contents of the array | |
4369b173 | 133 | PL_op_name defined and initialised in file F<opcode.h> of the Perl |
6badd1a5 | 134 | source distribution (and installed into the perl library). |
135 | ||
136 | Each operator has both a terse name (its opname) and a more verbose or | |
137 | recognisable descriptive name. The opdesc function can be used to | |
138 | return a list of descriptions for a list of operators. | |
139 | ||
140 | Many of the functions and methods listed below take a list of | |
141 | operators as parameters. Most operator lists can be made up of several | |
142 | types of element. Each element can be one of | |
143 | ||
144 | =over 8 | |
145 | ||
146 | =item an operator name (opname) | |
147 | ||
148 | Operator names are typically small lowercase words like enterloop, | |
149 | leaveloop, last, next, redo etc. Sometimes they are rather cryptic | |
150 | like gv2cv, i_ncmp and ftsvtx. | |
151 | ||
152 | =item an operator tag name (optag) | |
153 | ||
154 | Operator tags can be used to refer to groups (or sets) of operators. | |
7b8d334a | 155 | Tag names always begin with a colon. The Opcode module defines several |
6badd1a5 | 156 | optags and the user can define others using the define_optag function. |
157 | ||
158 | =item a negated opname or optag | |
159 | ||
160 | An opname or optag can be prefixed with an exclamation mark, e.g., !mkdir. | |
161 | Negating an opname or optag means remove the corresponding ops from the | |
162 | accumulated set of ops at that point. | |
163 | ||
164 | =item an operator set (opset) | |
165 | ||
7c011d3a | 166 | An I<opset> as a binary string of approximately 44 bytes which holds a |
6badd1a5 | 167 | set or zero or more operators. |
168 | ||
169 | The opset and opset_to_ops functions can be used to convert from | |
170 | a list of operators to an opset and I<vice versa>. | |
171 | ||
172 | Wherever a list of operators can be given you can use one or more opsets. | |
173 | See also Manipulating Opsets below. | |
174 | ||
175 | =back | |
176 | ||
177 | ||
178 | =head1 Opcode Functions | |
179 | ||
180 | The Opcode package contains functions for manipulating operator names | |
181 | tags and sets. All are available for export by the package. | |
182 | ||
183 | =over 8 | |
184 | ||
185 | =item opcodes | |
186 | ||
187 | In a scalar context opcodes returns the number of opcodes in this | |
7c011d3a | 188 | version of perl (around 350 for perl-5.7.0). |
6badd1a5 | 189 | |
190 | In a list context it returns a list of all the operator names. | |
191 | (Not yet implemented, use @names = opset_to_ops(full_opset).) | |
192 | ||
193 | =item opset (OP, ...) | |
194 | ||
195 | Returns an opset containing the listed operators. | |
196 | ||
197 | =item opset_to_ops (OPSET) | |
198 | ||
199 | Returns a list of operator names corresponding to those operators in | |
200 | the set. | |
201 | ||
202 | =item opset_to_hex (OPSET) | |
203 | ||
204 | Returns a string representation of an opset. Can be handy for debugging. | |
205 | ||
206 | =item full_opset | |
207 | ||
208 | Returns an opset which includes all operators. | |
209 | ||
210 | =item empty_opset | |
211 | ||
212 | Returns an opset which contains no operators. | |
213 | ||
214 | =item invert_opset (OPSET) | |
215 | ||
216 | Returns an opset which is the inverse set of the one supplied. | |
217 | ||
218 | =item verify_opset (OPSET, ...) | |
219 | ||
220 | Returns true if the supplied opset looks like a valid opset (is the | |
221 | right length etc) otherwise it returns false. If an optional second | |
222 | parameter is true then verify_opset will croak on an invalid opset | |
223 | instead of returning false. | |
224 | ||
225 | Most of the other Opcode functions call verify_opset automatically | |
226 | and will croak if given an invalid opset. | |
227 | ||
228 | =item define_optag (OPTAG, OPSET) | |
229 | ||
230 | Define OPTAG as a symbolic name for OPSET. Optag names always start | |
231 | with a colon C<:>. | |
232 | ||
233 | The optag name used must not be defined already (define_optag will | |
234 | croak if it is already defined). Optag names are global to the perl | |
235 | process and optag definitions cannot be altered or deleted once | |
236 | defined. | |
237 | ||
238 | It is strongly recommended that applications using Opcode should use a | |
239 | leading capital letter on their tag names since lowercase names are | |
240 | reserved for use by the Opcode module. If using Opcode within a module | |
241 | you should prefix your tags names with the name of your module to | |
242 | ensure uniqueness and thus avoid clashes with other modules. | |
243 | ||
244 | =item opmask_add (OPSET) | |
245 | ||
246 | Adds the supplied opset to the current opmask. Note that there is | |
247 | currently I<no> mechanism for unmasking ops once they have been masked. | |
248 | This is intentional. | |
249 | ||
250 | =item opmask | |
251 | ||
252 | Returns an opset corresponding to the current opmask. | |
253 | ||
254 | =item opdesc (OP, ...) | |
255 | ||
256 | This takes a list of operator names and returns the corresponding list | |
257 | of operator descriptions. | |
258 | ||
259 | =item opdump (PAT) | |
260 | ||
261 | Dumps to STDOUT a two column list of op names and op descriptions. | |
262 | If an optional pattern is given then only lines which match the | |
263 | (case insensitive) pattern will be output. | |
264 | ||
265 | It's designed to be used as a handy command line utility: | |
266 | ||
267 | perl -MOpcode=opdump -e opdump | |
268 | perl -MOpcode=opdump -e 'opdump Eval' | |
269 | ||
270 | =back | |
271 | ||
272 | =head1 Manipulating Opsets | |
273 | ||
274 | Opsets may be manipulated using the perl bit vector operators & (and), | (or), | |
275 | ^ (xor) and ~ (negate/invert). | |
276 | ||
277 | However you should never rely on the numerical position of any opcode | |
278 | within the opset. In other words both sides of a bit vector operator | |
279 | should be opsets returned from Opcode functions. | |
280 | ||
281 | Also, since the number of opcodes in your current version of perl might | |
282 | not be an exact multiple of eight, there may be unused bits in the last | |
283 | byte of an upset. This should not cause any problems (Opcode functions | |
284 | ignore those extra bits) but it does mean that using the ~ operator | |
285 | will typically not produce the same 'physical' opset 'string' as the | |
286 | invert_opset function. | |
287 | ||
288 | ||
289 | =head1 TO DO (maybe) | |
290 | ||
be1d34d7 FC |
291 | $bool = opset_eq($opset1, $opset2) true if opsets are logically |
292 | equivalent | |
6badd1a5 | 293 | $yes = opset_can($opset, @ops) true if $opset has all @ops set |
294 | ||
295 | @diff = opset_diff($opset1, $opset2) => ('foo', '!bar', ...) | |
296 | ||
297 | =cut | |
298 | ||
299 | # the =cut above is used by _init_optags() to get here quickly | |
300 | ||
301 | =head1 Predefined Opcode Tags | |
302 | ||
303 | =over 5 | |
304 | ||
305 | =item :base_core | |
306 | ||
307 | null stub scalar pushmark wantarray const defined undef | |
308 | ||
309 | rv2sv sassign | |
310 | ||
93bad3fd | 311 | rv2av aassign aelem aelemfast aelemfast_lex aslice av2arylen |
6badd1a5 | 312 | |
be1d34d7 FC |
313 | rv2hv helem hslice each values keys exists delete aeach akeys |
314 | avalues reach rvalues rkeys | |
6badd1a5 | 315 | |
be1d34d7 FC |
316 | preinc i_preinc predec i_predec postinc i_postinc |
317 | postdec i_postdec int hex oct abs pow multiply i_multiply | |
318 | divide i_divide modulo i_modulo add i_add subtract i_subtract | |
6badd1a5 | 319 | |
320 | left_shift right_shift bit_and bit_xor bit_or negate i_negate | |
321 | not complement | |
322 | ||
323 | lt i_lt gt i_gt le i_le ge i_ge eq i_eq ne i_ne ncmp i_ncmp | |
324 | slt sgt sle sge seq sne scmp | |
325 | ||
326 | substr vec stringify study pos length index rindex ord chr | |
327 | ||
be1d34d7 FC |
328 | ucfirst lcfirst uc lc fc quotemeta trans transr chop schop |
329 | chomp schomp | |
6badd1a5 | 330 | |
8782bef2 | 331 | match split qr |
6badd1a5 | 332 | |
333 | list lslice splice push pop shift unshift reverse | |
334 | ||
c963b151 | 335 | cond_expr flip flop andassign orassign dorassign and or dor xor |
6badd1a5 | 336 | |
5edb5b2a | 337 | warn die lineseq nextstate scope enter leave |
6badd1a5 | 338 | |
deb8a388 | 339 | rv2cv anoncode prototype coreargs |
6badd1a5 | 340 | |
be1d34d7 FC |
341 | entersub leavesub leavesublv return method method_named |
342 | -- XXX loops via recursion? | |
6badd1a5 | 343 | |
be1d34d7 FC |
344 | leaveeval -- needed for Safe to operate, is safe |
345 | without entereval | |
6badd1a5 | 346 | |
347 | =item :base_mem | |
348 | ||
349 | These memory related ops are not included in :base_core because they | |
350 | can easily be used to implement a resource attack (e.g., consume all | |
351 | available memory). | |
352 | ||
353 | concat repeat join range | |
354 | ||
355 | anonlist anonhash | |
356 | ||
3c4b39be | 357 | Note that despite the existence of this optag a memory resource attack |
6badd1a5 | 358 | may still be possible using only :base_core ops. |
359 | ||
360 | Disabling these ops is a I<very> heavy handed way to attempt to prevent | |
361 | a memory resource attack. It's probable that a specific memory limit | |
362 | mechanism will be added to perl in the near future. | |
363 | ||
364 | =item :base_loop | |
365 | ||
366 | These loop ops are not included in :base_core because they can easily be | |
367 | used to implement a resource attack (e.g., consume all available CPU time). | |
368 | ||
369 | grepstart grepwhile | |
370 | mapstart mapwhile | |
371 | enteriter iter | |
e897d888 | 372 | enterloop leaveloop unstack |
6badd1a5 | 373 | last next redo |
374 | goto | |
375 | ||
376 | =item :base_io | |
377 | ||
378 | These ops enable I<filehandle> (rather than filename) based input and | |
379 | output. These are safe on the assumption that only pre-existing | |
e866b74b RGS |
380 | filehandles are available for use. Usually, to create new filehandles |
381 | other ops such as open would need to be enabled, if you don't take into | |
382 | account the magical open of ARGV. | |
6badd1a5 | 383 | |
384 | readline rcatline getc read | |
385 | ||
386 | formline enterwrite leavewrite | |
387 | ||
0d863452 | 388 | print say sysread syswrite send recv |
96e4d5b1 | 389 | |
8903cb82 | 390 | eof tell seek sysseek |
6badd1a5 | 391 | |
392 | readdir telldir seekdir rewinddir | |
393 | ||
394 | =item :base_orig | |
395 | ||
396 | These are a hotchpotch of opcodes still waiting to be considered | |
397 | ||
398 | gvsv gv gelem | |
399 | ||
a7fd8ef6 | 400 | padsv padav padhv padcv padany padrange introcv clonecv |
6badd1a5 | 401 | |
87fc0556 NC |
402 | once |
403 | ||
6badd1a5 | 404 | rv2gv refgen srefgen ref |
405 | ||
be1d34d7 FC |
406 | bless -- could be used to change ownership of objects |
407 | (reblessing) | |
6badd1a5 | 408 | |
2cd61cdb | 409 | pushre regcmaybe regcreset regcomp subst substcont |
6badd1a5 | 410 | |
411 | sprintf prtf -- can core dump | |
412 | ||
413 | crypt | |
414 | ||
415 | tie untie | |
416 | ||
417 | dbmopen dbmclose | |
418 | sselect select | |
419 | pipe_op sockpair | |
420 | ||
be1d34d7 FC |
421 | getppid getpgrp setpgrp getpriority setpriority |
422 | localtime gmtime | |
6badd1a5 | 423 | |
424 | entertry leavetry -- can be used to 'hide' fatal errors | |
425 | ||
0d863452 RH |
426 | entergiven leavegiven |
427 | enterwhen leavewhen | |
428 | break continue | |
429 | smartmatch | |
430 | ||
53e06cf0 SC |
431 | custom -- where should this go |
432 | ||
6badd1a5 | 433 | =item :base_math |
434 | ||
435 | These ops are not included in :base_core because of the risk of them being | |
436 | used to generate floating point exceptions (which would have to be caught | |
437 | using a $SIG{FPE} handler). | |
438 | ||
439 | atan2 sin cos exp log sqrt | |
440 | ||
441 | These ops are not included in :base_core because they have an effect | |
442 | beyond the scope of the compartment. | |
443 | ||
444 | rand srand | |
445 | ||
1f5895a1 MB |
446 | =item :base_thread |
447 | ||
554b3eca | 448 | These ops are related to multi-threading. |
1f5895a1 | 449 | |
5b9081af | 450 | lock |
1f5895a1 | 451 | |
6badd1a5 | 452 | =item :default |
453 | ||
454 | A handy tag name for a I<reasonable> default set of ops. (The current ops | |
455 | allowed are unstable while development continues. It will change.) | |
456 | ||
e866b74b RGS |
457 | :base_core :base_mem :base_loop :base_orig :base_thread |
458 | ||
459 | This list used to contain :base_io prior to Opcode 1.07. | |
6badd1a5 | 460 | |
461 | If safety matters to you (and why else would you be using the Opcode module?) | |
462 | then you should not rely on the definition of this, or indeed any other, optag! | |
463 | ||
6badd1a5 | 464 | =item :filesys_read |
465 | ||
466 | stat lstat readlink | |
467 | ||
be1d34d7 FC |
468 | ftatime ftblk ftchr ftctime ftdir fteexec fteowned |
469 | fteread ftewrite ftfile ftis ftlink ftmtime ftpipe | |
470 | ftrexec ftrowned ftrread ftsgid ftsize ftsock ftsuid | |
471 | fttty ftzero ftrwrite ftsvtx | |
6badd1a5 | 472 | |
473 | fttext ftbinary | |
474 | ||
475 | fileno | |
476 | ||
477 | =item :sys_db | |
478 | ||
479 | ghbyname ghbyaddr ghostent shostent ehostent -- hosts | |
480 | gnbyname gnbyaddr gnetent snetent enetent -- networks | |
481 | gpbyname gpbynumber gprotoent sprotoent eprotoent -- protocols | |
482 | gsbyname gsbyport gservent sservent eservent -- services | |
483 | ||
484 | gpwnam gpwuid gpwent spwent epwent getlogin -- users | |
485 | ggrnam ggrgid ggrent sgrent egrent -- groups | |
486 | ||
487 | =item :browse | |
488 | ||
489 | A handy tag name for a I<reasonable> default set of ops beyond the | |
490 | :default optag. Like :default (and indeed all the other optags) its | |
491 | current definition is unstable while development continues. It will change. | |
492 | ||
493 | The :browse tag represents the next step beyond :default. It it a | |
494 | superset of the :default ops and adds :filesys_read the :sys_db. | |
495 | The intent being that scripts can access more (possibly sensitive) | |
496 | information about your system but not be able to change it. | |
497 | ||
498 | :default :filesys_read :sys_db | |
499 | ||
500 | =item :filesys_open | |
501 | ||
502 | sysopen open close | |
503 | umask binmode | |
504 | ||
505 | open_dir closedir -- other dir ops are in :base_io | |
506 | ||
507 | =item :filesys_write | |
508 | ||
509 | link unlink rename symlink truncate | |
510 | ||
511 | mkdir rmdir | |
512 | ||
513 | utime chmod chown | |
514 | ||
be1d34d7 FC |
515 | fcntl -- not strictly filesys related, but possibly as |
516 | dangerous? | |
6badd1a5 | 517 | |
518 | =item :subprocess | |
519 | ||
520 | backtick system | |
521 | ||
522 | fork | |
523 | ||
524 | wait waitpid | |
525 | ||
f812a825 | 526 | glob -- access to Cshell via <`rm *`> |
527 | ||
6badd1a5 | 528 | =item :ownprocess |
529 | ||
530 | exec exit kill | |
531 | ||
532 | time tms -- could be used for timing attacks (paranoid?) | |
533 | ||
534 | =item :others | |
535 | ||
536 | This tag holds groups of assorted specialist opcodes that don't warrant | |
537 | having optags defined for them. | |
538 | ||
539 | SystemV Interprocess Communications: | |
540 | ||
541 | msgctl msgget msgrcv msgsnd | |
542 | ||
543 | semctl semget semop | |
544 | ||
545 | shmctl shmget shmread shmwrite | |
546 | ||
6e8b06a8 RGS |
547 | =item :load |
548 | ||
549 | This tag holds opcodes related to loading modules and getting information | |
550 | about calling environment and args. | |
551 | ||
552 | require dofile | |
84ed0108 | 553 | caller runcv |
6e8b06a8 | 554 | |
6badd1a5 | 555 | =item :still_to_be_decided |
556 | ||
557 | chdir | |
558 | flock ioctl | |
559 | ||
560 | socket getpeername ssockopt | |
561 | bind connect listen accept shutdown gsockopt getsockname | |
562 | ||
563 | sleep alarm -- changes global timer state and signal handling | |
564 | sort -- assorted problems including core dumps | |
565 | tied -- can be used to access object implementing a tie | |
566 | pack unpack -- can be used to create/use memory pointers | |
567 | ||
996c9baa VP |
568 | hintseval -- constant op holding eval hints |
569 | ||
6badd1a5 | 570 | entereval -- can be used to hide code from initial compile |
6badd1a5 | 571 | |
572 | reset | |
573 | ||
574 | dbstate -- perl -d version of nextstate(ment) opcode | |
575 | ||
576 | =item :dangerous | |
577 | ||
578 | This tag is simply a bucket for opcodes that are unlikely to be used via | |
3c4b39be | 579 | a tag name but need to be tagged for completeness and documentation. |
6badd1a5 | 580 | |
581 | syscall dump chroot | |
582 | ||
6badd1a5 | 583 | =back |
584 | ||
585 | =head1 SEE ALSO | |
586 | ||
86780939 | 587 | L<ops> -- perl pragma interface to Opcode module. |
6badd1a5 | 588 | |
86780939 | 589 | L<Safe> -- Opcode and namespace limited execution compartments |
6badd1a5 | 590 | |
591 | =head1 AUTHORS | |
592 | ||
593 | Originally designed and implemented by Malcolm Beattie, | |
594 | mbeattie@sable.ox.ac.uk as part of Safe version 1. | |
595 | ||
596 | Split out from Safe module version 1, named opcode tags and other | |
7b8d334a | 597 | changes added by Tim Bunce. |
6badd1a5 | 598 | |
599 | =cut | |
600 |