Filename | /home/ss5/perl5/perlbrew/perls/perl-5.22.0/lib/site_perl/5.22.0/SQL/Tokenizer.pm |
Statements | Executed 13 statements in 537µs |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 9µs | 12µs | BEGIN@3 | SQL::Tokenizer::
1 | 1 | 1 | 6µs | 6µs | BEGIN@6 | SQL::Tokenizer::
1 | 1 | 1 | 4µs | 13µs | BEGIN@8 | SQL::Tokenizer::
1 | 1 | 1 | 4µs | 5µs | BEGIN@4 | SQL::Tokenizer::
1 | 1 | 1 | 2µs | 2µs | CORE:qr (opcode) | SQL::Tokenizer::
0 | 0 | 0 | 0s | 0s | tokenize | SQL::Tokenizer::
0 | 0 | 0 | 0s | 0s | tokenize_sql | SQL::Tokenizer::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | package SQL::Tokenizer; | ||||
2 | |||||
3 | 2 | 13µs | 2 | 16µs | # spent 12µs (9+4) within SQL::Tokenizer::BEGIN@3 which was called:
# once (9µs+4µs) by SQL::SplitStatement::BEGIN@14 at line 3 # spent 12µs making 1 call to SQL::Tokenizer::BEGIN@3
# spent 4µs making 1 call to warnings::import |
4 | 2 | 10µs | 2 | 6µs | # spent 5µs (4+1000ns) within SQL::Tokenizer::BEGIN@4 which was called:
# once (4µs+1000ns) by SQL::SplitStatement::BEGIN@14 at line 4 # spent 5µs making 1 call to SQL::Tokenizer::BEGIN@4
# spent 1µs making 1 call to strict::import |
5 | |||||
6 | 2 | 23µs | 1 | 6µs | # spent 6µs within SQL::Tokenizer::BEGIN@6 which was called:
# once (6µs+0s) by SQL::SplitStatement::BEGIN@14 at line 6 # spent 6µs making 1 call to SQL::Tokenizer::BEGIN@6 |
7 | |||||
8 | 2 | 476µs | 2 | 22µs | # spent 13µs (4+9) within SQL::Tokenizer::BEGIN@8 which was called:
# once (4µs+9µs) by SQL::SplitStatement::BEGIN@14 at line 8 # spent 13µs making 1 call to SQL::Tokenizer::BEGIN@8
# spent 9µs making 1 call to Exporter::import |
9 | |||||
10 | 1 | 4µs | our @ISA = qw(Exporter); | ||
11 | |||||
12 | 1 | 300ns | our @EXPORT_OK= qw(tokenize_sql); | ||
13 | |||||
14 | 1 | 200ns | our $VERSION= '0.24'; | ||
15 | |||||
16 | 1 | 6µs | 1 | 2µs | my $re= qr{ # spent 2µs making 1 call to SQL::Tokenizer::CORE:qr |
17 | ( | ||||
18 | (?:--|\#)[\ \t\S]* # single line comments | ||||
19 | | | ||||
20 | (?:<>|<=>|>=|<=|==|=|!=|!|<<|>>|<|>|\|\||\||&&|&|-|\+|\*(?!/)|/(?!\*)|\%|~|\^|\?) | ||||
21 | # operators and tests | ||||
22 | | | ||||
23 | [\[\]\(\),;.] # punctuation (parenthesis, comma) | ||||
24 | | | ||||
25 | \'\'(?!\') # empty single quoted string | ||||
26 | | | ||||
27 | \"\"(?!\"") # empty double quoted string | ||||
28 | | | ||||
29 | "(?>(?:(?>[^"\\]+)|""|\\.)*)+" | ||||
30 | # anything inside double quotes, ungreedy | ||||
31 | | | ||||
32 | `(?>(?:(?>[^`\\]+)|``|\\.)*)+` | ||||
33 | # anything inside backticks quotes, ungreedy | ||||
34 | | | ||||
35 | '(?>(?:(?>[^'\\]+)|''|\\.)*)+' | ||||
36 | # anything inside single quotes, ungreedy. | ||||
37 | | | ||||
38 | /\*[\ \t\r\n\S]*?\*/ # C style comments | ||||
39 | | | ||||
40 | (?:[\w:@]+(?:\.(?:\w+|\*)?)*) | ||||
41 | # words, standard named placeholders, db.table.*, db.* | ||||
42 | | | ||||
43 | (?: \$_\$ | \$\d+ | \${1,2} ) | ||||
44 | # dollar expressions - eg $_$ $3 $$ | ||||
45 | | | ||||
46 | \n # newline | ||||
47 | | | ||||
48 | [\t\ ]+ # any kind of white spaces | ||||
49 | ) | ||||
50 | }smx; | ||||
51 | |||||
52 | sub tokenize_sql { | ||||
53 | my ( $query, $remove_white_tokens )= @_; | ||||
54 | |||||
55 | my @query= $query =~ m{$re}smxg; | ||||
56 | |||||
57 | if ($remove_white_tokens) { | ||||
58 | @query= grep( !/^[\s\n\r]*$/, @query ); | ||||
59 | } | ||||
60 | |||||
61 | return wantarray ? @query : \@query; | ||||
62 | } | ||||
63 | |||||
64 | sub tokenize { | ||||
65 | my $class= shift; | ||||
66 | return tokenize_sql(@_); | ||||
67 | } | ||||
68 | |||||
69 | 1 | 3µs | 1; | ||
70 | |||||
71 | =pod | ||||
72 | |||||
73 | =head1 NAME | ||||
74 | |||||
75 | SQL::Tokenizer - A simple SQL tokenizer. | ||||
76 | |||||
77 | =head1 VERSION | ||||
78 | |||||
79 | 0.20 | ||||
80 | |||||
81 | =head1 SYNOPSIS | ||||
82 | |||||
83 | use SQL::Tokenizer qw(tokenize_sql); | ||||
84 | |||||
85 | my $query= q{SELECT 1 + 1}; | ||||
86 | my @tokens= SQL::Tokenizer->tokenize($query); | ||||
87 | |||||
88 | # @tokens now contains ('SELECT', ' ', '1', ' ', '+', ' ', '1') | ||||
89 | |||||
90 | @tokens= tokenize_sql($query); # procedural interface | ||||
91 | |||||
92 | =head1 DESCRIPTION | ||||
93 | |||||
94 | SQL::Tokenizer is a simple tokenizer for SQL queries. It does not claim to be | ||||
95 | a parser or query verifier. It just creates sane tokens from a valid SQL | ||||
96 | query. | ||||
97 | |||||
98 | It supports SQL with comments like: | ||||
99 | |||||
100 | -- This query is used to insert a message into | ||||
101 | -- logs table | ||||
102 | INSERT INTO log (application, message) VALUES (?, ?) | ||||
103 | |||||
104 | Also supports C<''>, C<""> and C<\'> escaping methods, so tokenizing queries | ||||
105 | like the one below should not be a problem: | ||||
106 | |||||
107 | INSERT INTO log (application, message) | ||||
108 | VALUES ('myapp', 'Hey, this is a ''single quoted string''!') | ||||
109 | |||||
110 | =head1 API | ||||
111 | |||||
112 | =over 4 | ||||
113 | |||||
114 | =item tokenize_sql | ||||
115 | |||||
116 | use SQL::Tokenizer qw(tokenize_sql); | ||||
117 | |||||
118 | my @tokens= tokenize_sql($query); | ||||
119 | my $tokens= tokenize_sql($query); | ||||
120 | |||||
121 | $tokens= tokenize_sql( $query, $remove_white_tokens ); | ||||
122 | |||||
123 | C<tokenize_sql> can be imported to current namespace on request. It receives a | ||||
124 | SQL query, and returns an array of tokens if called in list context, or an | ||||
125 | arrayref if called in scalar context. | ||||
126 | |||||
127 | =item tokenize | ||||
128 | |||||
129 | my @tokens= SQL::Tokenizer->tokenize($query); | ||||
130 | my $tokens= SQL::Tokenizer->tokenize($query); | ||||
131 | |||||
132 | $tokens= SQL::Tokenizer->tokenize( $query, $remove_white_tokens ); | ||||
133 | |||||
134 | This is the only available class method. It receives a SQL query, and returns an | ||||
135 | array of tokens if called in list context, or an arrayref if called in scalar | ||||
136 | context. | ||||
137 | |||||
138 | If C<$remove_white_tokens> is true, white spaces only tokens will be removed from | ||||
139 | result. | ||||
140 | |||||
141 | =back | ||||
142 | |||||
143 | =head1 ACKNOWLEDGEMENTS | ||||
144 | |||||
145 | =over 4 | ||||
146 | |||||
147 | =item | ||||
148 | |||||
149 | Evan Harris, for implementing Shell comment style and SQL operators. | ||||
150 | |||||
151 | =item | ||||
152 | |||||
153 | Charlie Hills, for spotting a lot of important issues I haven't thought. | ||||
154 | |||||
155 | =item | ||||
156 | |||||
157 | Jonas Kramer, for fixing MySQL quoted strings and treating dot as punctuation character correctly. | ||||
158 | |||||
159 | =item | ||||
160 | |||||
161 | Emanuele Zeppieri, for asking to fix SQL::Tokenizer to support dollars as well. | ||||
162 | |||||
163 | =item | ||||
164 | |||||
165 | Nigel Metheringham, for extending the dollar signal support. | ||||
166 | |||||
167 | =item | ||||
168 | |||||
169 | Devin Withers, for making it not choke on CR+LF in comments. | ||||
170 | |||||
171 | =item | ||||
172 | |||||
173 | Luc Lanthier, for simplifying the regex and make it not choke on backslashes. | ||||
174 | |||||
175 | =back | ||||
176 | |||||
177 | =head1 AUTHOR | ||||
178 | |||||
179 | Copyright (c) 2007, 2008, 2009, 2010, 2011 Igor Sutton Lopes "<IZUT@cpan.org>". All rights | ||||
180 | reserved. | ||||
181 | |||||
182 | This module is free software; you can redistribute it and/or modify it under | ||||
183 | the same terms as Perl itself. | ||||
184 | |||||
185 | =cut | ||||
186 | |||||
# spent 2µs within SQL::Tokenizer::CORE:qr which was called:
# once (2µs+0s) by SQL::SplitStatement::BEGIN@14 at line 16 |