This step serves as a profiler of input characters. It classifies each character against another character (or a string) according to a character group that the original character belongs to.
It is possible to define whether a single character or a sequence of characters belonging to a single group are classified with a single character or string.
<step id='charactergroupsanalyzer' className='cz.adastra.cif.tasks.text.CharacterGroupsAnalyzer'>
<properties>
<analyzedColumns>
<analyzedColumn src='name' dest='filtered_name'>
<scorer explanationColumn="expl">
<scoringEntries>
<scoringEntry key='CG_UNKNOWN_CHAR' score='1' explain='true' />
<scoringEntry key='CG_NULL_INPUT' score='2' explain='true' />
</scoringEntries>
</scorer>
</analyzedColumn>
<analyzedColumn src='last_name' dest='filtered_last_name'>
<scorer explanationColumn="expl">
<scoringEntries>
<scoringEntry key='CG_UNKNOWN_CHAR' score='1' explain='true' />
<scoringEntry key='CG_NULL_INPUT' score='2' explain='true' />
</scoringEntries>
</scorer>
</analyzedColumn>
</analyzedColumns>
<defaultCharacterGroups>
<characterGroup symbol='A' characters='[:uppercase:]' />
<characterGroup symbol='a' characters='[:lowercase:-efgh:]/+=:[]' />
<characterGroup symbol='x' characters='e-h' />
<characterGroup symbol='N' characters='0123456789' />
<characterGroup symbol=' ' characters='[:white:]-' />
</defaultCharacterGroups>
<copyUnknownCharacters>false</copyUnknownCharacters>
</properties>
</step>
| iWay Software |