Monday, 5 September 2016

regex

1. examples
  • regexr.com                 //playground
  • /pattern/g                   //global
  • /pattern/                     //1st occurrence
  • [enl]                           //individual match of e n l
  • [^enl]                         //individual match except e n l
  • [a-z]
  • [A-Z]
  • [0-9]
  • .                                  //everything except newline
  • \n                                //new line
  • \w                               //word char (including digits), 1 char is 1 match
  • \W                              //non word char
  • \d                                //digit, 1 digit is 1 match
  • \D                               //non digit
  • \s                                //white space
  • \S                               //non white space
  • [\s\S]                          //everything including newline
  • [^i]                             //i as 1st char in 1st line
  • [^i]/m                         //i as 1st char in multi line
  • \.                                 //period
  • (old)                           //capture group that select 'old'
  • (?:old)                        //non capture group that select 'old'
  • \1 or $1                      //1st capture group
  • g(?=old)                     //look ahead, 'g' that's followed by 'old'
  • g(?!old)                      //negative look ahead, g' that's not followed by 'old'
  • +                                 //one or more of a pattern
  • *                                 //zero or more of a pattern
  • ?                                 //zero or one of a pattern
  • {3}                             //3 copies of a pattern
  • {3,}                            //3 or more copies of a pattern
  • {3,4}                          //3 or 4 copies of a pattern


greedy vs lazy
greedy:
<.+>                                              //matches <em>Hello World</em>
\d+                                                //matches 12345

lazy
<.+?>                                            //? after + tells it to repeat as few as possible and matches <em>
\d+?                                              //matches 1

special character
  • ^ $                                       //begin end of line
  • \< \>                                    //begin end of word in vim
  • \                                          //give special meaning to next character
  • ?                                         //change preceding quantifier to lazy
  • /                                          //regex delimiter, mark the beginning and end of regex
common pattern
  • \b                                        //word boundary
  •  .|\n                                                       //any character or new line
  • <tag[^>]*>((.|\n)*?)<\/tag>                 //html tag
  • :%s/\([a-zA-Z]*\):/"\1":/g                    //search and replace through out whole file
  • :%s/'/"/g
assertion
  • is a condition that needs to be met for preceding or following characters, but is NOT part of the regex match result

in java
  • \\                                         //\ is special character in java, need to escape it

online playground: https://regex101.com/
best tutorial: i-wanna-use-regex-but-what-does-it-all-mean


vi search and replace example
  • :%s/old/new/g                           //all lines
  • :s/old/new/gi                             //current line, ignore case
  • :s/\<old\>/new/                         //old is a whole word
  • %s:/g:abc:g                               //special char, use any char as delimit, in this case :
  • :g/^baz/s/foo/bar/g                   //change in each line starting with 'baz'
when searching:

., *, \, [, ^, and $ are metacharacters.
+, ?, |, &, {, (, and ) must be escaped to use their special function


reference
1. best regex website
2. good article explaining regex in javascript 
3. java regular expression
4. rexegg tutorial
5. greedy and lazy match
6. difference between :g and :%s commands in vim
7. regex online
8. i-wanna-use-regex-but-what-does-it-all-mean

No comments:

Post a Comment