ocrd_models.ocrd_page module

API to PAGE-XML, generated with generateDS from XML schema.

ocrd_models.ocrd_page.parse(inFileName, silence=False)[source]
ocrd_models.ocrd_page.parseString(inString, silence=False)[source]

Parse a string, create the object tree, and export it.

Arguments: - inString – A string. This XML fragment should not start

with an XML declaration containing an encoding.
  • silence – A boolean. If False, export the object.

Returns – The root object in the tree.

class ocrd_models.ocrd_page.AlternativeImageType(filename=None, comments=None, conf=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Confidence value (between 0 and 1)

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='AlternativeImageType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='AlternativeImageType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='AlternativeImageType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_comments()[source]
get_conf()[source]
get_filename()[source]
hasContent_()[source]
set_comments(comments)[source]
set_conf(conf)[source]
set_filename(filename)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.CoordsType(points=None, conf=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Polygon outline of the element as a path of points. No points may lie outside the outline of its parent, which in the case of Border is the bounding rectangle of the root image. Paths are closed by convention, i.e. the last point logically connects with the first (and at least 3 points are required to span an area). Paths must be planar (i.e. must not self-intersect). Confidence value (between 0 and 1)

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='CoordsType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='CoordsType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='CoordsType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_conf()[source]
get_points()[source]
hasContent_()[source]
set_conf(conf)[source]
set_points(points)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.GlyphType(id=None, ligature=None, symbol=None, script=None, production=None, custom=None, comments=None, AlternativeImage=None, Coords=None, Graphemes=None, TextEquiv=None, TextStyle=None, UserDefined=None, Labels=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

The script used for the glyph Overrides the production attribute of the parent word / text line / text region. For generic use

add_AlternativeImage(value)[source]
add_Labels(value)[source]
add_TextEquiv(value)[source]
build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GlyphType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='GlyphType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='GlyphType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_AlternativeImage()[source]
get_Coords()[source]
get_Graphemes()[source]
get_Labels()[source]
get_TextEquiv()[source]
get_TextStyle()[source]
get_UserDefined()[source]
get_comments()[source]
get_custom()[source]
get_id()[source]
get_ligature()[source]
get_production()[source]
get_script()[source]
get_symbol()[source]
hasContent_()[source]
insert_AlternativeImage_at(index, value)[source]
insert_Labels_at(index, value)[source]
insert_TextEquiv_at(index, value)[source]
replace_AlternativeImage_at(index, value)[source]
replace_Labels_at(index, value)[source]
replace_TextEquiv_at(index, value)[source]
set_AlternativeImage(AlternativeImage)[source]
set_Coords(Coords)[source]
set_Graphemes(Graphemes)[source]
set_Labels(Labels)[source]
set_TextEquiv(TextEquiv)[source]
set_TextStyle(TextStyle)[source]
set_UserDefined(UserDefined)[source]
set_comments(comments)[source]
set_custom(custom)[source]
set_id(id)[source]
set_ligature(ligature)[source]
set_production(production)[source]
set_script(script)[source]
set_symbol(symbol)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.ImageRegionType(id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, colourDepth=None, bgColour=None, embText=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.RegionType

An image is considered to be more intricate and complex than a graphic. These can be photos or drawings. The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). Range: -179.999,180 The colour bit depth required for the region The background colour of the region Specifies whether the region also contains text

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='', name_='ImageRegionType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='ImageRegionType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='', name_='ImageRegionType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_bgColour()[source]
get_colourDepth()[source]
get_embText()[source]
get_orientation()[source]
hasContent_()[source]
set_bgColour(bgColour)[source]
set_colourDepth(colourDepth)[source]
set_embText(embText)[source]
set_orientation(orientation)[source]
subclass = None
superclass

alias of RegionType

class ocrd_models.ocrd_page.LabelType(value=None, type_=None, comments=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Semantic label The label / tag (e.g. ‘person’). Can be an RDF resource identifier (e.g. object of an RDF triple). Additional information on the label (e.g. ‘YYYY-mm-dd’ for a date label). Can be used as predicate of an RDF triple.

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='LabelType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_comments()[source]
get_type()[source]
get_value()[source]
hasContent_()[source]
set_comments(comments)[source]
set_type(type_)[source]
set_value(value)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.LabelsType(externalModel=None, externalId=None, prefix=None, comments=None, Label=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Reference to external model / ontology / schema E.g. an RDF resource identifier (to be used as subject or object of an RDF triple) Prefix for all labels (e.g. first part of an URI)

add_Label(value)[source]
build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelsType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='LabelsType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='LabelsType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_Label()[source]
get_comments()[source]
get_externalId()[source]
get_externalModel()[source]
get_prefix()[source]
hasContent_()[source]
insert_Label_at(index, value)[source]
replace_Label_at(index, value)[source]
set_Label(Label)[source]
set_comments(comments)[source]
set_externalId(externalId)[source]
set_externalModel(externalModel)[source]
set_prefix(prefix)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.MathsRegionType(id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, bgColour=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.RegionType

Regions containing equations and mathematical symbols should be marked as maths regions. The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti- clockwise rotation). Range: -179.999,180 The background colour of the region

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='', name_='MathsRegionType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='MathsRegionType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='', name_='MathsRegionType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_bgColour()[source]
get_orientation()[source]
hasContent_()[source]
set_bgColour(bgColour)[source]
set_orientation(orientation)[source]
subclass = None
superclass

alias of RegionType

class ocrd_models.ocrd_page.MetadataType(externalRef=None, Creator=None, Created=None, LastChange=None, Comments=None, UserDefined=None, MetadataItem=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

External reference of any kind

add_MetadataItem(value)[source]
build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MetadataType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='MetadataType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MetadataType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_Comments()[source]
get_Created()[source]
get_Creator()[source]
get_LastChange()[source]
get_MetadataItem()[source]
get_UserDefined()[source]
get_externalRef()[source]
hasContent_()[source]
insert_MetadataItem_at(index, value)[source]
replace_MetadataItem_at(index, value)[source]
set_Comments(Comments)[source]
set_Created(Created)[source]
set_Creator(Creator)[source]
set_LastChange(LastChange)[source]
set_MetadataItem(MetadataItem)[source]
set_UserDefined(UserDefined)[source]
set_externalRef(externalRef)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.MetadataItemType(type_=None, name=None, value=None, date=None, Labels=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Type of metadata (e.g. author) E.g. imagePhotometricInterpretation E.g. RGB

add_Labels(value)[source]
build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MetadataItemType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='MetadataItemType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='MetadataItemType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_Labels()[source]
get_date()[source]
get_name()[source]
get_type()[source]
get_value()[source]
hasContent_()[source]
insert_Labels_at(index, value)[source]
replace_Labels_at(index, value)[source]
set_Labels(Labels)[source]
set_date(date)[source]
set_name(name)[source]
set_type(type_)[source]
set_value(value)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.NoiseRegionType(id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.RegionType

Noise regions are regions where no real data lies, only false data created by artifacts on the document or scanner noise.

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='', name_='NoiseRegionType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='NoiseRegionType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='', name_='NoiseRegionType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
hasContent_()[source]
subclass = None
superclass

alias of RegionType

class ocrd_models.ocrd_page.OrderedGroupType(id=None, regionRef=None, caption=None, type_=None, continuation=None, custom=None, comments=None, UserDefined=None, Labels=None, RegionRefIndexed=None, OrderedGroupIndexed=None, UnorderedGroupIndexed=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Numbered group (contains ordered elements) Optional link to a parent region of nested regions. The parent region doubles as reading order group. Only the nested regions should be allowed as group members. Is this group a continuation of another group (from previous column or page, for example)? For generic use

add_Labels(value)[source]
add_OrderedGroupIndexed(value)[source]
add_RegionRefIndexed(value)[source]
add_UnorderedGroupIndexed(value)[source]
build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='OrderedGroupType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='OrderedGroupType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_Labels()[source]
get_OrderedGroupIndexed()[source]
get_RegionRefIndexed()[source]
get_UnorderedGroupIndexed()[source]
get_UserDefined()[source]
get_caption()[source]
get_comments()[source]
get_continuation()[source]
get_custom()[source]
get_id()[source]
get_regionRef()[source]
get_type()[source]
hasContent_()[source]
insert_Labels_at(index, value)[source]
insert_OrderedGroupIndexed_at(index, value)[source]
insert_RegionRefIndexed_at(index, value)[source]
insert_UnorderedGroupIndexed_at(index, value)[source]
replace_Labels_at(index, value)[source]
replace_OrderedGroupIndexed_at(index, value)[source]
replace_RegionRefIndexed_at(index, value)[source]
replace_UnorderedGroupIndexed_at(index, value)[source]
set_Labels(Labels)[source]
set_OrderedGroupIndexed(OrderedGroupIndexed)[source]
set_RegionRefIndexed(RegionRefIndexed)[source]
set_UnorderedGroupIndexed(UnorderedGroupIndexed)[source]
set_UserDefined(UserDefined)[source]
set_caption(caption)[source]
set_comments(comments)[source]
set_continuation(continuation)[source]
set_custom(custom)[source]
set_id(id)[source]
set_regionRef(regionRef)[source]
set_type(type_)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.PageType(imageFilename=None, imageWidth=None, imageHeight=None, imageXResolution=None, imageYResolution=None, imageResolutionUnit=None, custom=None, orientation=None, type_=None, primaryLanguage=None, secondaryLanguage=None, primaryScript=None, secondaryScript=None, readingDirection=None, textLineOrder=None, conf=None, AlternativeImage=None, Border=None, PrintSpace=None, ReadingOrder=None, Layers=None, Relations=None, TextStyle=None, UserDefined=None, Labels=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, MapRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Contains the image file name including the file extension. Specifies the width of the image.Specifies the height of the image.Specifies the image resolution in width.Specifies the image resolution in height. Specifies the unit of the resolution information referring to a standardised unit of measurement (pixels per inch, pixels per centimeter or other). For generic use The angle the rectangle encapsulating the page (or its Border) has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti- clockwise rotation). (The rotated image can be further referenced via “AlternativeImage”.) Range: -179.999,180 The type of the page within the document (e.g. cover page). The primary language used in the page (lower-level definitions override the page-level definition). The secondary language used in the page (lower-level definitions override the page-level definition). The primary script used in the page (lower-level definitions override the page-level definition). The secondary script used in the page (lower-level definitions override the page-level definition). The direction in which text within lines should be read (order of words and characters), in addition to “textLineOrder” (lower-level definitions override the page-level definition). The order of text lines within a block, in addition to “readingDirection” (lower-level definitions override the page-level definition). Confidence value for whole page (between 0 and 1)

add_AdvertRegion(value)[source]
add_AlternativeImage(value)[source]
add_ChartRegion(value)[source]
add_ChemRegion(value)[source]
add_CustomRegion(value)[source]
add_GraphicRegion(value)[source]
add_ImageRegion(value)[source]
add_Labels(value)[source]
add_LineDrawingRegion(value)[source]
add_MapRegion(value)[source]
add_MathsRegion(value)[source]
add_MusicRegion(value)[source]
add_NoiseRegion(value)[source]
add_SeparatorRegion(value)[source]
add_TableRegion(value)[source]
add_TextRegion(value)[source]
add_UnknownRegion(value)[source]
build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PageType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='PageType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PageType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_AdvertRegion()[source]
get_AlternativeImage()[source]
get_Border()[source]
get_ChartRegion()[source]
get_ChemRegion()[source]
get_CustomRegion()[source]
get_GraphicRegion()[source]
get_ImageRegion()[source]
get_Labels()[source]
get_Layers()[source]
get_LineDrawingRegion()[source]
get_MapRegion()[source]
get_MathsRegion()[source]
get_MusicRegion()[source]
get_NoiseRegion()[source]
get_PrintSpace()[source]
get_ReadingOrder()[source]
get_Relations()[source]
get_SeparatorRegion()[source]
get_TableRegion()[source]
get_TextRegion()[source]
get_TextStyle()[source]
get_UnknownRegion()[source]
get_UserDefined()[source]
get_conf()[source]
get_custom()[source]
get_imageFilename()[source]
get_imageHeight()[source]
get_imageResolutionUnit()[source]
get_imageWidth()[source]
get_imageXResolution()[source]
get_imageYResolution()[source]
get_orientation()[source]
get_primaryLanguage()[source]
get_primaryScript()[source]
get_readingDirection()[source]
get_secondaryLanguage()[source]
get_secondaryScript()[source]
get_textLineOrder()[source]
get_type()[source]
hasContent_()[source]
insert_AdvertRegion_at(index, value)[source]
insert_AlternativeImage_at(index, value)[source]
insert_ChartRegion_at(index, value)[source]
insert_ChemRegion_at(index, value)[source]
insert_CustomRegion_at(index, value)[source]
insert_GraphicRegion_at(index, value)[source]
insert_ImageRegion_at(index, value)[source]
insert_Labels_at(index, value)[source]
insert_LineDrawingRegion_at(index, value)[source]
insert_MapRegion_at(index, value)[source]
insert_MathsRegion_at(index, value)[source]
insert_MusicRegion_at(index, value)[source]
insert_NoiseRegion_at(index, value)[source]
insert_SeparatorRegion_at(index, value)[source]
insert_TableRegion_at(index, value)[source]
insert_TextRegion_at(index, value)[source]
insert_UnknownRegion_at(index, value)[source]
replace_AdvertRegion_at(index, value)[source]
replace_AlternativeImage_at(index, value)[source]
replace_ChartRegion_at(index, value)[source]
replace_ChemRegion_at(index, value)[source]
replace_CustomRegion_at(index, value)[source]
replace_GraphicRegion_at(index, value)[source]
replace_ImageRegion_at(index, value)[source]
replace_Labels_at(index, value)[source]
replace_LineDrawingRegion_at(index, value)[source]
replace_MapRegion_at(index, value)[source]
replace_MathsRegion_at(index, value)[source]
replace_MusicRegion_at(index, value)[source]
replace_NoiseRegion_at(index, value)[source]
replace_SeparatorRegion_at(index, value)[source]
replace_TableRegion_at(index, value)[source]
replace_TextRegion_at(index, value)[source]
replace_UnknownRegion_at(index, value)[source]
set_AdvertRegion(AdvertRegion)[source]
set_AlternativeImage(AlternativeImage)[source]
set_Border(Border)[source]
set_ChartRegion(ChartRegion)[source]
set_ChemRegion(ChemRegion)[source]
set_CustomRegion(CustomRegion)[source]
set_GraphicRegion(GraphicRegion)[source]
set_ImageRegion(ImageRegion)[source]
set_Labels(Labels)[source]
set_Layers(Layers)[source]
set_LineDrawingRegion(LineDrawingRegion)[source]
set_MapRegion(MapRegion)[source]
set_MathsRegion(MathsRegion)[source]
set_MusicRegion(MusicRegion)[source]
set_NoiseRegion(NoiseRegion)[source]
set_PrintSpace(PrintSpace)[source]
set_ReadingOrder(ReadingOrder)[source]
set_Relations(Relations)[source]
set_SeparatorRegion(SeparatorRegion)[source]
set_TableRegion(TableRegion)[source]
set_TextRegion(TextRegion)[source]
set_TextStyle(TextStyle)[source]
set_UnknownRegion(UnknownRegion)[source]
set_UserDefined(UserDefined)[source]
set_conf(conf)[source]
set_custom(custom)[source]
set_imageFilename(imageFilename)[source]
set_imageHeight(imageHeight)[source]
set_imageResolutionUnit(imageResolutionUnit)[source]
set_imageWidth(imageWidth)[source]
set_imageXResolution(imageXResolution)[source]
set_imageYResolution(imageYResolution)[source]
set_orientation(orientation)[source]
set_primaryLanguage(primaryLanguage)[source]
set_primaryScript(primaryScript)[source]
set_readingDirection(readingDirection)[source]
set_secondaryLanguage(secondaryLanguage)[source]
set_secondaryScript(secondaryScript)[source]
set_textLineOrder(textLineOrder)[source]
set_type(type_)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.PcGtsType(pcGtsId=None, Metadata=None, Page=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PcGtsType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='PcGtsType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='PcGtsType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_Metadata()[source]
get_Page()[source]
get_pcGtsId()[source]
hasContent_()[source]
set_Metadata(Metadata)[source]
set_Page(Page)[source]
set_pcGtsId(pcGtsId)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.ReadingOrderType(conf=None, OrderedGroup=None, UnorderedGroup=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Definition of the reading order within the page. To express a reading order between elements they have to be included in an OrderedGroup. Groups may contain further groups. Confidence value (between 0 and 1)

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ReadingOrderType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='ReadingOrderType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='ReadingOrderType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_OrderedGroup()[source]
get_UnorderedGroup()[source]
get_conf()[source]
hasContent_()[source]
set_OrderedGroup(OrderedGroup)[source]
set_UnorderedGroup(UnorderedGroup)[source]
set_conf(conf)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.RegionRefIndexedType(index=None, regionRef=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Numbered regionPosition (order number) of this item within the current hierarchy level.

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RegionRefIndexedType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='RegionRefIndexedType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='RegionRefIndexedType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_index()[source]
get_regionRef()[source]
hasContent_()[source]
set_index(index)[source]
set_regionRef(regionRef)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.SeparatorRegionType(id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, colour=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.RegionType

Separators are lines that lie between columns and paragraphs and can be used to logically separate different articles from each other. The angle the rectangle encapsulating a region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). Range: -179.999,180 The colour of the separator

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='', name_='SeparatorRegionType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='SeparatorRegionType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='', name_='SeparatorRegionType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_colour()[source]
get_orientation()[source]
hasContent_()[source]
set_colour(colour)[source]
set_orientation(orientation)[source]
subclass = None
superclass

alias of RegionType

class ocrd_models.ocrd_page.TextEquivType(index=None, conf=None, dataType=None, dataTypeDetails=None, comments=None, PlainText=None, Unicode=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Used for sort order in case multiple TextEquivs are defined. The text content with the lowest index should be interpreted as the main text content. OCR confidence value (between 0 and 1) Type of text content (is it free text or a number, for instance). This is only a descriptive attribute, the text type is not checked during XML validation. Refinement for dataType attribute. Can be a regular expression, for instance.

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextEquivType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='TextEquivType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextEquivType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_PlainText()[source]
get_Unicode()[source]
get_comments()[source]
get_conf()[source]
get_dataType()[source]
get_dataTypeDetails()[source]
get_index()[source]
hasContent_()[source]
set_PlainText(PlainText)[source]
set_Unicode(Unicode)[source]
set_comments(comments)[source]
set_conf(conf)[source]
set_dataType(dataType)[source]
set_dataTypeDetails(dataTypeDetails)[source]
set_index(index)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.TextLineType(id=None, primaryLanguage=None, primaryScript=None, secondaryScript=None, readingDirection=None, production=None, custom=None, comments=None, index=None, AlternativeImage=None, Coords=None, Baseline=None, Word=None, TextEquiv=None, TextStyle=None, UserDefined=None, Labels=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Overrides primaryLanguage attribute of parent text region The primary script used in the text line The secondary script used in the text line The direction in which text within the line should be read (order of words and characters). Overrides the production attribute of the parent text region For generic use Position (order number) of this text line within the parent text region.

add_AlternativeImage(value)[source]
add_Labels(value)[source]
add_TextEquiv(value)[source]
add_Word(value)[source]
build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextLineType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='TextLineType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextLineType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_AlternativeImage()[source]
get_Baseline()[source]
get_Coords()[source]
get_Labels()[source]
get_TextEquiv()[source]
get_TextStyle()[source]
get_UserDefined()[source]
get_Word()[source]
get_comments()[source]
get_custom()[source]
get_id()[source]
get_index()[source]
get_primaryLanguage()[source]
get_primaryScript()[source]
get_production()[source]
get_readingDirection()[source]
get_secondaryScript()[source]
hasContent_()[source]
insert_AlternativeImage_at(index, value)[source]
insert_Labels_at(index, value)[source]
insert_TextEquiv_at(index, value)[source]
insert_Word_at(index, value)[source]
replace_AlternativeImage_at(index, value)[source]
replace_Labels_at(index, value)[source]
replace_TextEquiv_at(index, value)[source]
replace_Word_at(index, value)[source]
set_AlternativeImage(AlternativeImage)[source]
set_Baseline(Baseline)[source]
set_Coords(Coords)[source]
set_Labels(Labels)[source]
set_TextEquiv(TextEquiv)[source]
set_TextStyle(TextStyle)[source]
set_UserDefined(UserDefined)[source]
set_Word(Word)[source]
set_comments(comments)[source]
set_custom(custom)[source]
set_id(id)[source]
set_index(index)[source]
set_primaryLanguage(primaryLanguage)[source]
set_primaryScript(primaryScript)[source]
set_production(production)[source]
set_readingDirection(readingDirection)[source]
set_secondaryScript(secondaryScript)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.TextStyleType(fontFamily=None, serif=None, monospace=None, fontSize=None, xHeight=None, kerning=None, textColour=None, textColourRgb=None, bgColour=None, bgColourRgb=None, reverseVideo=None, bold=None, italic=None, underlined=None, underlineStyle=None, doubleUnderlined=None, subscript=None, superscript=None, strikethrough=None, smallCaps=None, letterSpaced=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Monospace (fixed-pitch, non-proportional) or proportional font. For instance: Arial, Times New Roman. Add more information if necessary (e.g. blackletter, antiqua). Serif or sans-serif typeface. The size of the characters in points. The x-height or corpus size refers to the distance between the baseline and the mean line of lower-case letters in a typeface. The unit is assumed to be pixels. The degree of space (in points) between the characters in a string of text. Text colour in RGB encoded format (red value) + (256 x green value) + (65536 x blue value). Background colour Background colour in RGB encoded format (red value) + (256 x green value) + (65536 x blue value). Specifies whether the colour of the text appears reversed against a background colour. Line style details if “underlined” is TRUE

build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextStyleType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='TextStyleType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='TextStyleType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_bgColour()[source]
get_bgColourRgb()[source]
get_bold()[source]
get_doubleUnderlined()[source]
get_fontFamily()[source]
get_fontSize()[source]
get_italic()[source]
get_kerning()[source]
get_letterSpaced()[source]
get_monospace()[source]
get_reverseVideo()[source]
get_serif()[source]
get_smallCaps()[source]
get_strikethrough()[source]
get_subscript()[source]
get_superscript()[source]
get_textColour()[source]
get_textColourRgb()[source]
get_underlineStyle()[source]
get_underlined()[source]
get_xHeight()[source]
hasContent_()[source]
set_bgColour(bgColour)[source]
set_bgColourRgb(bgColourRgb)[source]
set_bold(bold)[source]
set_doubleUnderlined(doubleUnderlined)[source]
set_fontFamily(fontFamily)[source]
set_fontSize(fontSize)[source]
set_italic(italic)[source]
set_kerning(kerning)[source]
set_letterSpaced(letterSpaced)[source]
set_monospace(monospace)[source]
set_reverseVideo(reverseVideo)[source]
set_serif(serif)[source]
set_smallCaps(smallCaps)[source]
set_strikethrough(strikethrough)[source]
set_subscript(subscript)[source]
set_superscript(superscript)[source]
set_textColour(textColour)[source]
set_textColourRgb(textColourRgb)[source]
set_underlineStyle(underlineStyle)[source]
set_underlined(underlined)[source]
set_xHeight(xHeight)[source]
subclass = None
superclass = None
class ocrd_models.ocrd_page.TextRegionType(id=None, custom=None, comments=None, continuation=None, AlternativeImage=None, Coords=None, UserDefined=None, Labels=None, Roles=None, TextRegion=None, ImageRegion=None, LineDrawingRegion=None, GraphicRegion=None, TableRegion=None, ChartRegion=None, SeparatorRegion=None, MathsRegion=None, ChemRegion=None, MusicRegion=None, AdvertRegion=None, NoiseRegion=None, UnknownRegion=None, CustomRegion=None, orientation=None, type_=None, leading=None, readingDirection=None, textLineOrder=None, readingOrientation=None, indented=None, align=None, primaryLanguage=None, secondaryLanguage=None, primaryScript=None, secondaryScript=None, production=None, TextLine=None, TextEquiv=None, TextStyle=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.RegionType

Pure text is represented as a text region. This includes drop capitals, but practically ornate text may be considered as a graphic. The angle the rectangle encapsulating the region has to be rotated in clockwise direction in order to correct the present skew (negative values indicate anti-clockwise rotation). (The rotated image can be further referenced via “AlternativeImage”.) Range: -179.999,180 The nature of the text in the region The degree of space in points between the lines of text (line spacing) The direction in which text within lines should be read (order of words and characters), in addition to “textLineOrder”. The order of text lines within the block, in addition to “readingDirection”. The angle the baseline of text within the region has to be rotated (relative to the rectangle encapsulating the region) in clockwise direction in order to correct the present skew, in addition to “orientation” (negative values indicate anti-clockwise rotation). Range: -179.999,180 Defines whether a region of text is indented or not Text align The primary language used in the region The secondary language used in the region The primary script used in the region The secondary script used in the region

add_TextEquiv(value)[source]
add_TextLine(value)[source]
build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='', name_='TextRegionType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='TextRegionType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='', name_='TextRegionType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_TextEquiv()[source]
get_TextLine()[source]
get_TextStyle()[source]
get_align()[source]
get_indented()[source]
get_leading()[source]
get_orientation()[source]
get_primaryLanguage()[source]
get_primaryScript()[source]
get_production()[source]
get_readingDirection()[source]
get_readingOrientation()[source]
get_secondaryLanguage()[source]
get_secondaryScript()[source]
get_textLineOrder()[source]
get_type()[source]
hasContent_()[source]
insert_TextEquiv_at(index, value)[source]
insert_TextLine_at(index, value)[source]
replace_TextEquiv_at(index, value)[source]
replace_TextLine_at(index, value)[source]
set_TextEquiv(TextEquiv)[source]
set_TextLine(TextLine)[source]
set_TextStyle(TextStyle)[source]
set_align(align)[source]
set_indented(indented)[source]
set_leading(leading)[source]
set_orientation(orientation)[source]
set_primaryLanguage(primaryLanguage)[source]
set_primaryScript(primaryScript)[source]
set_production(production)[source]
set_readingDirection(readingDirection)[source]
set_readingOrientation(readingOrientation)[source]
set_secondaryLanguage(secondaryLanguage)[source]
set_secondaryScript(secondaryScript)[source]
set_textLineOrder(textLineOrder)[source]
set_type(type_)[source]
subclass = None
superclass

alias of RegionType

class ocrd_models.ocrd_page.WordType(id=None, language=None, primaryScript=None, secondaryScript=None, readingDirection=None, production=None, custom=None, comments=None, AlternativeImage=None, Coords=None, Glyph=None, TextEquiv=None, TextStyle=None, UserDefined=None, Labels=None, **kwargs_)[source]

Bases: ocrd_models.ocrd_page_generateds.GeneratedsSuper

Overrides primaryLanguage attribute of parent line and/or text region The primary script used in the word The secondary script used in the word The direction in which text within the word should be read (order of characters). Overrides the production attribute of the parent text line and/or text region. For generic use

add_AlternativeImage(value)[source]
add_Glyph(value)[source]
add_Labels(value)[source]
add_TextEquiv(value)[source]
build(node)[source]
buildAttributes(node, attrs, already_processed)[source]
buildChildren(child_, node, nodeName_, fromsubclass_=False)[source]
export(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='WordType', pretty_print=True)[source]
exportAttributes(outfile, level, already_processed, namespaceprefix_='pc:', name_='WordType')[source]
exportChildren(outfile, level, namespaceprefix_='pc:', namespacedef_='xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15"', name_='WordType', fromsubclass_=False, pretty_print=True)[source]
static factory(*args_, **kwargs_)[source]
get_AlternativeImage()[source]
get_Coords()[source]
get_Glyph()[source]
get_Labels()[source]
get_TextEquiv()[source]
get_TextStyle()[source]
get_UserDefined()[source]
get_comments()[source]
get_custom()[source]
get_id()[source]
get_language()[source]
get_primaryScript()[source]
get_production()[source]
get_readingDirection()[source]
get_secondaryScript()[source]
hasContent_()[source]
insert_AlternativeImage_at(index, value)[source]
insert_Glyph_at(index, value)[source]
insert_Labels_at(index, value)[source]
insert_TextEquiv_at(index, value)[source]
replace_AlternativeImage_at(index, value)[source]
replace_Glyph_at(index, value)[source]
replace_Labels_at(index, value)[source]
replace_TextEquiv_at(index, value)[source]
set_AlternativeImage(AlternativeImage)[source]
set_Coords(Coords)[source]
set_Glyph(Glyph)[source]
set_Labels(Labels)[source]
set_TextEquiv(TextEquiv)[source]
set_TextStyle(TextStyle)[source]
set_UserDefined(UserDefined)[source]
set_comments(comments)[source]
set_custom(custom)[source]
set_id(id)[source]
set_language(language)[source]
set_primaryScript(primaryScript)[source]
set_production(production)[source]
set_readingDirection(readingDirection)[source]
set_secondaryScript(secondaryScript)[source]
subclass = None
superclass = None
ocrd_models.ocrd_page.to_xml(el)[source]

Serialize pc:PcGts document