[LINUX] Regular expression in regex.h

myreg.cpp



#include <sys/types.h>
#include <regex.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>

#include <string>
#include <map>

typedef std::string Str;
typedef std::map< int, Str > Mis;
typedef std::map< int, Mis > Mimis;
int myreg( Str input_string, Str pattern, Mimis &ret, size_t back_ref_size = 42 );

int myreg( Str input_string, Str pattern, Mimis &result, size_t back_ref_size ) {
	
	int ret = 0;
	regex_t rg;
	
	//cflag is REG for the time being_With EXTENDED. See the regcomp manpage for how the behavior changes.
	ret = regcomp( &rg, pattern.c_str(), REG_EXTENDED );
	
	if( ret ) {
		
		char buf[1024] = "";
		regerror( ret, &rg, buf, 1024 );
		
		fprintf( stderr, "regcomp failed. %s\n", buf );
		
		regfree( &rg );
		return( 1 );
		
	}
	
	int match_count = 0;
	
	while( 1 ) {
		
		regmatch_t *matching = new regmatch_t[back_ref_size];
		
		//eflag is 0 for the time being. See the regexec manpage for how the behavior changes.
		ret = regexec( &rg, input_string.c_str(), back_ref_size, matching, 0 );
		
		if( ret ) {
			
			//This is not an error, it's just that there are no matches
			delete [] matching;
			break;
			
		}
		
		int first_end_index = -1; //Extract the first trailing position.
		int back_ref_count = 0;
		
		for( size_t i = 0; i < back_ref_size; ++i, ++back_ref_count ) {
			
			int start_index = matching[i].rm_so;
			int end_index = matching[i].rm_eo;
			
			if( start_index == -1 || end_index == -1 ) {
				
				break;
				
			}
			
			if( i == 0 ) {
				
				first_end_index = end_index;
				
				//Empty match measures.
				if( start_index == end_index ) {
					
					++first_end_index;
					
				}
				
			}
			
			int len = end_index - start_index;
			
			if( len ) {
				
				result[match_count][back_ref_count] = input_string.substr( start_index, len );
				
			} else {
				
				result[match_count][back_ref_count] = "";
				
			}
			
		}
		
		if( first_end_index == -1 ) {
			
			delete [] matching;
			break;
			
		}
		
		input_string = input_string.substr( first_end_index );
		
		if( input_string.empty() ) {
			
			delete [] matching;
			break;
			
		}
		
		++match_count;
		
	}
	
	regfree( &rg );
	return( 0 );
	
}

int main( int argc, char **argv ) {
	
	if( argc != 3 ) {
		
		printf( "./myreg input_string pattern\n\n" );
		return( 1 );
		
	}
	
	printf( "input_string:\n%s\n", argv[1] );
	printf( "\n" );
	printf( "pattern:\n%s\n", argv[2] );
	printf( "\n" );
	
	Mimis result;
	
	if( myreg( argv[1], argv[2], result ) ) {
		
		return( 1 );
		
	}
	
	printf( "result:\n" );
	
	for( Mimis::iterator it = result.begin(), eit = result.end(); it != eit; ++it ) {
		
		for( Mis::iterator itt = it->second.begin(), eitt = it->second.end(); itt != eitt; ++itt ) {
			
			printf( "%d %d %s\n", it->first, itt->first, itt->second.c_str() );
			
		}
		
	}
	
	return( 0 );
	
}


It seems that regular expressions in C and C ++ can't be used quickly.

That's why I decided to use the POSIX regex function.

And here is the code I made.

You can also retrieve the whole match + back reference. (It's hard to write all the back reference processing by yourself, isn't it?)

Execution result below


[todanano@localhost samba]$ ./myreg 'abc123def' '(abc).*(def)'
input_string:
abc123def

pattern:
(abc).*(def)

result:
0 0 abc123def
0 1 abc
0 2 def
[todanano@localhost samba]$
[todanano@localhost samba]$ ./myreg '192.168.0.1-192.168.0.254' '([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})'
input_string:
192.168.0.1-192.168.0.254

pattern:
([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})\.([[:digit:]]{1,3})

result:
0 0 192.168.0.1
0 1 192
0 2 168
0 3 0
0 4 1
1 0 192.168.0.254
1 1 192
1 2 168
1 3 0
1 4 254
[todanano@localhost samba]$
[todanano@localhost samba]$

Recommended Posts

Regular expression in regex.h
Regular expression in Python
Regular expression in Python
Regular expression Greedy
Regular expression re
Start / end match in python regular expression
Regular expression with pymongo
Date notation regular expression
Regular expression look-ahead, after-yomi
How to write regular expression patterns in Linux
python regular expression memo
Regular expression matching method
Regular expression symbolic group name in Python / Ruby
Regular expression confirmation quiz!
Use let expression in Python
Python 處 處 regular expression Notes
Use regular expressions in C
Use regular expressions in Python
Julia Quick Note [04] Regular Expression
Regular expression manipulation with Python
Regular expression check tool summary
Reproduce the Python regular expression r'\ w (? U)' in JavaScript
How to get all the possible values in a regular expression
Decompose hostname with co.jp with regular expression
String replacement with Python regular expression
100 language processing knocks 2020: Chapter 3 (regular expression)
Introduction to regular expression processing system
When using regular expressions in Python
Features of regular expression modules that I often use personally in Python
What's in the parameter? Edit String & Expression
Use print in a Python2 lambda expression
(Python) HTML reading and regular expression notes
Replace non-ASCII with regular expressions in Python
Don't use \ d in Python 3 regular expressions!
Search pythondict dictionary key by regular expression
How to use regular expressions in Python