public class XMLFileCommitter extends AbstractFSCommitter<EnhancedXMLStreamWriter>
Commits documents to XML files. There are two kinds of document representations: upserts and deletions.
If you request to split upserts and deletions into separate files, the generated files will start with "upsert-" (for additions/modifications) and "delete-" (for deletions).
The generated files are never updated. Sending a modified document with the same reference will create a new entry and won't modify any existing ones. You can think of the generated files as a set of commit instructions.
The generated XML file names are made of a timestamp and a sequence number.
You have the option to give a prefix or suffix to files that will be created (default does not add any).
<docs>
<!-- Document additions: -->
<upsert>
<reference>(document reference, e.g., URL)</reference>
<metadata>
<meta
name="(meta field name)">
(value)
</meta>
<meta
name="(meta field name)">
(value)
</meta>
<!-- meta is repeated for each metadata fields -->
</metadata>
<content>(document content goes here)</content>
</upsert>
<upsert>
<!-- upsert element is repeated for each additions -->
</upsert>
<!-- Document deletions: -->
<delete>
<reference>(document reference, e.g., URL)</reference>
<metadata>
<meta
name="(meta field name)">
(value)
</meta>
<meta
name="(meta field name)">
(value)
</meta>
<!-- meta is repeated for each metadata fields -->
</metadata>
</delete>
<delete>
<!-- delete element is repeated for each deletions -->
</delete>
</docs>
<committer
class="com.norconex.committer.core3.fs.impl.XMLFileCommitter">
<directory>(path where to save the files)</directory>
<docsPerFile>(max number of docs per file)</docsPerFile>
<compress>[false|true]</compress>
<splitUpsertDelete>[false|true]</splitUpsertDelete>
<fileNamePrefix>(optional prefix to created file names)</fileNamePrefix>
<fileNameSuffix>(optional suffix to created file names)</fileNameSuffix>
<!-- multiple "restrictTo" tags allowed (only one needs to match) -->
<restrictTo>
<fieldMatcher
method="[basic|csv|wildcard|regex]"
ignoreCase="[false|true]"
ignoreDiacritic="[false|true]"
partial="[false|true]">
(field-matching expression)
</fieldMatcher>
<valueMatcher
method="[basic|csv|wildcard|regex]"
ignoreCase="[false|true]"
ignoreDiacritic="[false|true]"
partial="[false|true]">
(value-matching expression)
</valueMatcher>
</restrictTo>
<fieldMappings>
<!-- Add as many field mappings as needed -->
<mapping
fromField="(source field name)"
toField="(target field name)"/>
</fieldMappings>
<indent>(number of indentation spaces, default does not indent)</indent>
</committer>
Constructor and Description |
---|
XMLFileCommitter() |
Modifier and Type | Method and Description |
---|---|
protected void |
closeDocWriter(EnhancedXMLStreamWriter xml) |
protected EnhancedXMLStreamWriter |
createDocWriter(Writer writer) |
boolean |
equals(Object other) |
protected String |
getFileExtension() |
int |
getIndent() |
int |
hashCode() |
void |
loadFSCommitterFromXML(XML xml) |
void |
saveFSCommitterToXML(XML xml) |
void |
setIndent(int indent) |
String |
toString() |
protected void |
writeDelete(EnhancedXMLStreamWriter xml,
DeleteRequest deleteRequest) |
protected void |
writeUpsert(EnhancedXMLStreamWriter xml,
UpsertRequest upsertRequest) |
doClean, doClose, doDelete, doInit, doUpsert, getDirectory, getDocsPerFile, getFileNamePrefix, getFileNameSuffix, isCompress, isSplitUpsertDelete, loadCommitterFromXML, saveCommitterToXML, setCompress, setDirectory, setDocsPerFile, setFileNamePrefix, setFileNameSuffix, setSplitUpsertDelete
accept, addRestriction, addRestrictions, applyFieldMappings, clean, clearFieldMappings, clearRestrictions, close, delete, fireDebug, fireDebug, fireError, fireError, fireInfo, fireInfo, getCommitterContext, getFieldMappings, getRestrictions, init, loadFromXML, removeFieldMapping, removeRestriction, removeRestriction, saveToXML, setFieldMapping, setFieldMappings, upsert
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
loadFromXML, saveToXML
public int getIndent()
public void setIndent(int indent)
protected String getFileExtension()
getFileExtension
in class AbstractFSCommitter<EnhancedXMLStreamWriter>
protected EnhancedXMLStreamWriter createDocWriter(Writer writer) throws IOException
createDocWriter
in class AbstractFSCommitter<EnhancedXMLStreamWriter>
IOException
protected void writeUpsert(EnhancedXMLStreamWriter xml, UpsertRequest upsertRequest) throws IOException
writeUpsert
in class AbstractFSCommitter<EnhancedXMLStreamWriter>
IOException
protected void writeDelete(EnhancedXMLStreamWriter xml, DeleteRequest deleteRequest) throws IOException
writeDelete
in class AbstractFSCommitter<EnhancedXMLStreamWriter>
IOException
protected void closeDocWriter(EnhancedXMLStreamWriter xml) throws IOException
closeDocWriter
in class AbstractFSCommitter<EnhancedXMLStreamWriter>
IOException
public void loadFSCommitterFromXML(XML xml)
loadFSCommitterFromXML
in class AbstractFSCommitter<EnhancedXMLStreamWriter>
public void saveFSCommitterToXML(XML xml)
saveFSCommitterToXML
in class AbstractFSCommitter<EnhancedXMLStreamWriter>
public boolean equals(Object other)
equals
in class AbstractFSCommitter<EnhancedXMLStreamWriter>
public int hashCode()
hashCode
in class AbstractFSCommitter<EnhancedXMLStreamWriter>
public String toString()
toString
in class AbstractFSCommitter<EnhancedXMLStreamWriter>
Copyright © 2009–2022 Norconex Inc.. All rights reserved.