Commit | Line | Data |
---|---|---|
be96f5c3 S |
1 | package Parse::CPAN::Meta; |
2 | ||
3 | use strict; | |
4 | use Carp 'croak'; | |
5 | BEGIN { | |
6 | require 5.004; | |
7 | require Exporter; | |
7cc1b246 | 8 | $Parse::CPAN::Meta::VERSION = '0.05'; |
be96f5c3 S |
9 | @Parse::CPAN::Meta::ISA = qw{ Exporter }; |
10 | @Parse::CPAN::Meta::EXPORT_OK = qw{ Load LoadFile }; | |
11 | } | |
12 | ||
13 | # Prototypes | |
14 | sub LoadFile ($); | |
15 | sub Load ($); | |
16 | sub _scalar ($$$); | |
17 | sub _array ($$$); | |
18 | sub _hash ($$$); | |
19 | ||
20 | # Printable characters for escapes | |
21 | my %UNESCAPES = ( | |
22 | z => "\x00", a => "\x07", t => "\x09", | |
23 | n => "\x0a", v => "\x0b", f => "\x0c", | |
24 | r => "\x0d", e => "\x1b", '\\' => '\\', | |
25 | ); | |
26 | ||
27 | ||
c59d1bfa S |
28 | my %BOM = ( |
29 | "\357\273\277" => 'UTF-8', | |
30 | "\376\377" => 'UTF-16BE', | |
31 | "\377\376" => 'UTF-16LE', | |
32 | "\0\0\376\377" => 'UTF-32BE', | |
33 | "\377\376\0\0" => 'UTF-32LE' | |
34 | ); | |
35 | ||
36 | sub BOM_MIN_LENGTH () { 2 } | |
37 | sub BOM_MAX_LENGTH () { 4 } | |
38 | sub HAVE_UTF8 () { $] >= 5.007003 } | |
39 | ||
40 | BEGIN { require utf8 if HAVE_UTF8 } | |
be96f5c3 S |
41 | |
42 | ||
43 | ##################################################################### | |
44 | # Implementation | |
45 | ||
46 | # Create an object from a file | |
47 | sub LoadFile ($) { | |
48 | # Check the file | |
49 | my $file = shift; | |
50 | croak('You did not specify a file name') unless $file; | |
51 | croak( "File '$file' does not exist" ) unless -e $file; | |
52 | croak( "'$file' is a directory, not a file" ) unless -f _; | |
53 | croak( "Insufficient permissions to read '$file'" ) unless -r _; | |
54 | ||
55 | # Slurp in the file | |
56 | local $/ = undef; | |
57 | open( CFG, $file ) or croak("Failed to open file '$file': $!"); | |
58 | my $yaml = <CFG>; | |
59 | close CFG or croak("Failed to close file '$file': $!"); | |
60 | ||
61 | # Hand off to the actual parser | |
62 | Load( $yaml ); | |
63 | } | |
64 | ||
65 | # Parse a document from a string. | |
66 | # Doing checks on $_[0] prevents us having to do a string copy. | |
67 | sub Load ($) { | |
c59d1bfa S |
68 | |
69 | my $str = $_[0]; | |
70 | ||
71 | # Handle special cases | |
72 | foreach my $length ( BOM_MIN_LENGTH .. BOM_MAX_LENGTH ) { | |
73 | if ( my $enc = $BOM{substr($str, 0, $length)} ) { | |
74 | croak("Stream has a non UTF-8 BOM") unless $enc eq 'UTF-8'; | |
75 | substr($str, 0, $length) = ''; # strip UTF-8 bom if found, we'll just ignore it | |
76 | } | |
77 | } | |
78 | ||
79 | if ( HAVE_UTF8 ) { | |
80 | utf8::decode($str); # try to decode as utf8 | |
81 | } | |
82 | ||
83 | unless ( defined $str ) { | |
be96f5c3 S |
84 | croak("Did not provide a string to Load"); |
85 | } | |
c59d1bfa S |
86 | return() unless length $str; |
87 | unless ( $str =~ /[\012\015]+$/ ) { | |
be96f5c3 S |
88 | croak("Stream does not end with newline character"); |
89 | } | |
90 | ||
91 | # Split the file into lines | |
92 | my @lines = grep { ! /^\s*(?:\#.*)?$/ } | |
c59d1bfa | 93 | split /(?:\015{1,2}\012|\015|\012)/, $str; |
be96f5c3 S |
94 | |
95 | # A nibbling parser | |
96 | my @documents = (); | |
97 | while ( @lines ) { | |
98 | # Do we have a document header? | |
99 | if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?$/ ) { | |
100 | # Handle scalar documents | |
101 | shift @lines; | |
102 | if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML:[\d\.]+)$/ ) { | |
103 | push @documents, _scalar( "$1", [ undef ], \@lines ); | |
104 | next; | |
105 | } | |
106 | } | |
107 | ||
108 | if ( ! @lines or $lines[0] =~ /^---\s*(?:(.+)\s*)?$/ ) { | |
109 | # A naked document | |
110 | push @documents, undef; | |
111 | ||
112 | } elsif ( $lines[0] =~ /^\s*\-/ ) { | |
113 | # An array at the root | |
114 | my $document = [ ]; | |
115 | push @documents, $document; | |
116 | _array( $document, [ 0 ], \@lines ); | |
117 | ||
118 | } elsif ( $lines[0] =~ /^(\s*)\w/ ) { | |
119 | # A hash at the root | |
120 | my $document = { }; | |
121 | push @documents, $document; | |
122 | _hash( $document, [ length($1) ], \@lines ); | |
123 | ||
124 | } else { | |
125 | croak("Parse::CPAN::Meta does not support the line '$lines[0]'"); | |
126 | } | |
127 | } | |
128 | ||
129 | if ( wantarray ) { | |
130 | return @documents; | |
131 | } else { | |
132 | return $documents[-1]; | |
133 | } | |
134 | } | |
135 | ||
136 | # Deparse a scalar string to the actual scalar | |
137 | sub _scalar ($$$) { | |
138 | my $string = shift; | |
139 | my $indent = shift; | |
140 | my $lines = shift; | |
141 | ||
142 | # Trim trailing whitespace | |
143 | $string =~ s/\s*$//; | |
144 | ||
145 | # Explitic null/undef | |
146 | return undef if $string eq '~'; | |
147 | ||
148 | # Quotes | |
149 | if ( $string =~ /^\'(.*?)\'$/ ) { | |
150 | return '' unless defined $1; | |
151 | my $rv = $1; | |
152 | $rv =~ s/\'\'/\'/g; | |
153 | return $rv; | |
154 | } | |
155 | if ( $string =~ /^\"((?:\\.|[^\"])*)\"$/ ) { | |
156 | my $str = $1; | |
157 | $str =~ s/\\"/"/g; | |
158 | $str =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}/gex; | |
159 | return $str; | |
160 | } | |
161 | if ( $string =~ /^[\'\"]/ ) { | |
162 | # A quote with folding... we don't support that | |
163 | croak("Parse::CPAN::Meta does not support multi-line quoted scalars"); | |
164 | } | |
165 | ||
166 | # Null hash and array | |
167 | if ( $string eq '{}' ) { | |
168 | # Null hash | |
169 | return {}; | |
170 | } | |
171 | if ( $string eq '[]' ) { | |
172 | # Null array | |
173 | return []; | |
174 | } | |
175 | ||
176 | # Regular unquoted string | |
177 | return $string unless $string =~ /^[>|]/; | |
178 | ||
179 | # Error | |
180 | croak("Multi-line scalar content missing") unless @$lines; | |
181 | ||
182 | # Check the indent depth | |
183 | $lines->[0] =~ /^(\s*)/; | |
184 | $indent->[-1] = length("$1"); | |
185 | if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) { | |
186 | croak("Illegal line indenting"); | |
187 | } | |
188 | ||
189 | # Pull the lines | |
190 | my @multiline = (); | |
191 | while ( @$lines ) { | |
192 | $lines->[0] =~ /^(\s*)/; | |
193 | last unless length($1) >= $indent->[-1]; | |
194 | push @multiline, substr(shift(@$lines), length($1)); | |
195 | } | |
196 | ||
197 | my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n"; | |
198 | my $t = (substr($string, 1, 1) eq '-') ? '' : "\n"; | |
199 | return join( $j, @multiline ) . $t; | |
200 | } | |
201 | ||
202 | # Parse an array | |
203 | sub _array ($$$) { | |
204 | my $array = shift; | |
205 | my $indent = shift; | |
206 | my $lines = shift; | |
207 | ||
208 | while ( @$lines ) { | |
209 | # Check for a new document | |
210 | return 1 if $lines->[0] =~ /^---\s*(?:(.+)\s*)?$/; | |
211 | ||
212 | # Check the indent level | |
213 | $lines->[0] =~ /^(\s*)/; | |
214 | if ( length($1) < $indent->[-1] ) { | |
215 | return 1; | |
216 | } elsif ( length($1) > $indent->[-1] ) { | |
217 | croak("Hash line over-indented"); | |
218 | } | |
219 | ||
220 | if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) { | |
221 | # Inline nested hash | |
222 | my $indent2 = length("$1"); | |
223 | $lines->[0] =~ s/-/ /; | |
224 | push @$array, { }; | |
225 | _hash( $array->[-1], [ @$indent, $indent2 ], $lines ); | |
226 | ||
227 | } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*$/ ) { | |
228 | # Array entry with a value | |
229 | shift @$lines; | |
230 | push @$array, _scalar( "$2", [ @$indent, undef ], $lines ); | |
231 | ||
232 | } elsif ( $lines->[0] =~ /^\s*\-\s*$/ ) { | |
233 | shift @$lines; | |
234 | unless ( @$lines ) { | |
235 | push @$array, undef; | |
236 | return 1; | |
237 | } | |
238 | if ( $lines->[0] =~ /^(\s*)\-/ ) { | |
239 | my $indent2 = length("$1"); | |
240 | if ( $indent->[-1] == $indent2 ) { | |
241 | # Null array entry | |
242 | push @$array, undef; | |
243 | } else { | |
244 | # Naked indenter | |
245 | push @$array, [ ]; | |
246 | _array( $array->[-1], [ @$indent, $indent2 ], $lines ); | |
247 | } | |
248 | ||
249 | } elsif ( $lines->[0] =~ /^(\s*)\w/ ) { | |
250 | push @$array, { }; | |
251 | _hash( $array->[-1], [ @$indent, length("$1") ], $lines ); | |
252 | ||
253 | } else { | |
254 | croak("Parse::CPAN::Meta does not support the line '$lines->[0]'"); | |
255 | } | |
256 | ||
257 | } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) { | |
258 | # This is probably a structure like the following... | |
259 | # --- | |
260 | # foo: | |
261 | # - list | |
262 | # bar: value | |
263 | # | |
264 | # ... so lets return and let the hash parser handle it | |
265 | return 1; | |
266 | ||
267 | } else { | |
268 | croak("Parse::CPAN::Meta does not support the line '$lines->[0]'"); | |
269 | } | |
270 | } | |
271 | ||
272 | return 1; | |
273 | } | |
274 | ||
275 | # Parse an array | |
276 | sub _hash ($$$) { | |
277 | my $hash = shift; | |
278 | my $indent = shift; | |
279 | my $lines = shift; | |
280 | ||
281 | while ( @$lines ) { | |
282 | # Check for a new document | |
283 | return 1 if $lines->[0] =~ /^---\s*(?:(.+)\s*)?$/; | |
284 | ||
285 | # Check the indent level | |
286 | $lines->[0] =~/^(\s*)/; | |
287 | if ( length($1) < $indent->[-1] ) { | |
288 | return 1; | |
289 | } elsif ( length($1) > $indent->[-1] ) { | |
290 | croak("Hash line over-indented"); | |
291 | } | |
292 | ||
293 | # Get the key | |
294 | unless ( $lines->[0] =~ s/^\s*([^\'\"][^\n]*?)\s*:(\s+|$)// ) { | |
295 | croak("Bad hash line"); | |
296 | } | |
297 | my $key = $1; | |
298 | ||
299 | # Do we have a value? | |
300 | if ( length $lines->[0] ) { | |
301 | # Yes | |
302 | $hash->{$key} = _scalar( shift(@$lines), [ @$indent, undef ], $lines ); | |
303 | next; | |
304 | } | |
305 | ||
306 | # An indent | |
307 | shift @$lines; | |
308 | unless ( @$lines ) { | |
309 | $hash->{$key} = undef; | |
310 | return 1; | |
311 | } | |
312 | if ( $lines->[0] =~ /^(\s*)-/ ) { | |
313 | $hash->{$key} = []; | |
314 | _array( $hash->{$key}, [ @$indent, length($1) ], $lines ); | |
315 | } elsif ( $lines->[0] =~ /^(\s*)./ ) { | |
316 | my $indent2 = length("$1"); | |
317 | if ( $indent->[-1] >= $indent2 ) { | |
318 | # Null hash entry | |
319 | $hash->{$key} = undef; | |
320 | } else { | |
321 | $hash->{$key} = {}; | |
322 | _hash( $hash->{$key}, [ @$indent, length($1) ], $lines ); | |
323 | } | |
324 | } | |
325 | } | |
326 | ||
327 | return 1; | |
328 | } | |
329 | ||
330 | 1; | |
331 | ||
332 | __END__ | |
333 | ||
334 | =pod | |
335 | ||
336 | =head1 NAME | |
337 | ||
338 | Parse::CPAN::Meta - Parse META.yml and other similar CPAN metadata files | |
339 | ||
340 | =head1 SYNOPSIS | |
341 | ||
342 | ############################################# | |
343 | # In your file | |
344 | ||
345 | --- | |
346 | rootproperty: blah | |
347 | section: | |
348 | one: two | |
349 | three: four | |
350 | Foo: Bar | |
351 | empty: ~ | |
352 | ||
353 | ||
354 | ||
355 | ############################################# | |
356 | # In your program | |
357 | ||
358 | use Parse::CPAN::Meta; | |
359 | ||
360 | # Create a YAML file | |
361 | my @yaml = Parse::CPAN::Meta::LoadFile( 'Meta.yml' ); | |
362 | ||
363 | # Reading properties | |
364 | my $root = $yaml[0]->{rootproperty}; | |
365 | my $one = $yaml[0]->{section}->{one}; | |
366 | my $Foo = $yaml[0]->{section}->{Foo}; | |
367 | ||
368 | =head1 DESCRIPTION | |
369 | ||
370 | B<Parse::CPAN::Meta> is a parser for META.yml files, based on the | |
371 | parser half of L<YAML::Tiny>. | |
372 | ||
373 | It supports a basic subset of the full YAML specification, enough to | |
374 | implement parsing of typical META.yml files, and other similarly simple | |
375 | YAML files. | |
376 | ||
377 | If you need something with more power, move up to a full YAML parser such | |
378 | as L<YAML>, L<YAML::Syck> or L<YAML::LibYAML>. | |
379 | ||
380 | Parse::CPAN::Meta provides a very simply API of only two functions, based | |
381 | on the YAML functions of the same name. Wherever possible, identical | |
382 | calling semantics are used. | |
383 | ||
384 | All error reporting is done with exceptions (dieing). | |
385 | ||
386 | =head1 FUNCTIONS | |
387 | ||
388 | For maintenance clarity, no functions are exported. | |
389 | ||
390 | =head2 Load( $string ) | |
391 | ||
392 | my @documents = Load( $string ); | |
393 | ||
394 | Parses a string containing a valid YAML stream into a list of Perl data | |
395 | structures. | |
396 | ||
397 | =head2 LoadFile( $file_name ) | |
398 | ||
399 | Reads the YAML stream from a file instead of a string. | |
400 | ||
401 | =head1 SUPPORT | |
402 | ||
403 | Bugs should be reported via the CPAN bug tracker at | |
404 | ||
405 | L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Parse-CPAN-Meta> | |
406 | ||
407 | =head1 AUTHOR | |
408 | ||
409 | Adam Kennedy E<lt>adamk@cpan.orgE<gt> | |
410 | ||
411 | =head1 SEE ALSO | |
412 | ||
413 | L<YAML::Tiny>, L<YAML>, L<YAML::Syck> | |
414 | ||
415 | =head1 COPYRIGHT | |
416 | ||
417 | Copyright 2006 - 2009 Adam Kennedy. | |
418 | ||
419 | This program is free software; you can redistribute | |
420 | it and/or modify it under the same terms as Perl itself. | |
421 | ||
422 | The full text of the license can be found in the | |
423 | LICENSE file included with this module. | |
424 | ||
425 | =cut |