After some googling and then submitting a question in re2-dev google group, I couldn't find any useful help about using "named groups" in RE2.
for example if you want to extract protocol, url path and query string parts from a given url string, it's good idea to use named group in RE2:
sample urls :
http://localhost:8080/hello?name=Hassan
a sample but not perfect RegEx may be :
^(?P<PROTO>https?)://(?P<URL>.+)\?(?P<QUERY>.+)?$
The best way for getting PROTO, URL and QUERY values is using named group capturing feature in RE2.
this is the Code I wrote for this :
for example if you want to extract protocol, url path and query string parts from a given url string, it's good idea to use named group in RE2:
sample urls :
http://localhost:8080/hello?name=Hassan
a sample but not perfect RegEx may be :
^(?P<PROTO>https?)://(?P<URL>.+)\?(?P<QUERY>.+)?$
The best way for getting PROTO, URL and QUERY values is using named group capturing feature in RE2.
this is the Code I wrote for this :
x
bool FindNamedGroups(const std::string &p_regex,const std::string &p_text,std::map<std::string,std::string> *p_group_value)
{
p_group_value->clear();
RE2 rx(p_regex);
if(!rx.ok())
{
std::cerr << "Invalid Regular Expression :" << p_regex << std::endl;
return false;
}
size_t named_grp_size =rx.NumberOfCapturingGroups();
if(named_grp_size>10)
{
std::cerr << "No support for more than 10 named groups :" << named_grp_size<< std::endl;
return false;
}
const std::map<std::string,int> &grp_to_idx=rx.NamedCapturingGroups();
RE2::Arg args[10];
std::string vars[10];
const RE2::Arg * const p_args[10]={&args[0],&args[1],&args[2],&args[3],&args[4],&args[5],&args[6],&args[7],&args[8],&args[9]};
int var_count=0;
for(var_count=0;var_count<10;var_count++)
args[var_count]=&vars[var_count];
re2::StringPiece sp_input(p_text);
//after running following function. matched groups value will be stored in p_args which point to args which point to vars!
bool found= RE2::FindAndConsumeN(&sp_input,rx,p_args,named_grp_size);
if(!found)
{
return false ;
}
std::map<std::string,int>::const_iterator iter_grps=grp_to_idx.cbegin();
for(;iter_grps!=grp_to_idx.cend();++iter_grps)
{
(*p_group_value)[iter_grps->first]=vars[iter_grps->second-1];
}
return true;
}
//////////// USAGE ////////////////
FindNamedGroups("^(?P<PROTO>https?)://(?P<URL>.+)\\?(?P<QUERY>.+)?$","http://localhost:8080/hello?name=Hassan",&g_v);
iter=g_v.cbegin();
for(;iter!=g_v.cend();++iter)
std::cout << iter->first << " = " << iter->second << std::endl;
x
No comments :
Post a Comment