roboto.domain.topics.parquet#

Submodules#

Package Contents#

class roboto.domain.topics.parquet.ParquetParser(source, min_required_row_group_size=100000, small_row_group_count_threshold=32)#
Parameters:
  • source (pathlib.Path)

  • min_required_row_group_size (int)

  • small_row_group_count_threshold (int)

property column_count: int#
Return type:

int

extract_timestamp_info(timestamp_column_name=None, timestamp_unit=None)#
Parameters:
Return type:

roboto.domain.topics.parquet.timestamp.TimestampInfo

property fields: Generator[pyarrow.Field, None, None]#
Return type:

Generator[pyarrow.Field, None, None]

find_timestamp_field_by_type()#
Return type:

pyarrow.Field

get_data_for_column(column_name)#
Parameters:

column_name (str)

Return type:

pyarrow.Table

get_timestamp_field_by_name(column_name)#
Parameters:

column_name (str)

Return type:

pyarrow.Field

static is_parquet_file(path)#
Parameters:

path (pathlib.Path)

Return type:

bool

requires_rewrite(timestamp)#
Parameters:

timestamp (roboto.domain.topics.parquet.timestamp.TimestampInfo)

Return type:

bool

rewrite(outfile, timestamp, target_row_group_size_bytes=100 * 1000 * 1000)#
Parameters:
Return type:

None

property row_count: int#
Return type:

int

property row_group_count: int#
Return type:

int

property row_group_size: int#
Return type:

int

class roboto.domain.topics.parquet.ParquetTopicReader(roboto_client)#

Bases: roboto.domain.topics.topic_reader.TopicReader

Private interface for retrieving topic data stored in Parquet files.

Note

This is not intended as a public API. To access topic data, prefer the get_data or get_data_as_df methods on Topic, MessagePath, or Event.

Parameters:

roboto_client (roboto.http.RobotoClient)

static accepts(message_paths_to_representations)#
Parameters:

message_paths_to_representations (collections.abc.Iterable[roboto.domain.topics.operations.MessagePathRepresentationMapping])

Return type:

bool

get_data(message_paths_to_representations, log_time_attr_name, log_time_unit=TimeUnit.Nanoseconds, start_time=None, end_time=None, timestamp_message_path_representation_mapping=None)#
Parameters:
Return type:

collections.abc.Generator[dict[str, Any], None, None]

get_data_as_df(message_paths_to_representations, log_time_attr_name, log_time_unit=TimeUnit.Nanoseconds, start_time=None, end_time=None, timestamp_message_path_representation_mapping=None)#
Parameters:
Return type:

pandas.DataFrame

roboto.domain.topics.parquet.field_to_message_path_request(field, parquet_file, timestamp)#
Parameters:
Return type:

roboto.domain.topics.operations.AddMessagePathRequest

roboto.domain.topics.parquet.make_topic_filename_safe(name, replacement_char='_')#
Parameters:
  • name (str)

  • replacement_char (str)

Return type:

str

roboto.domain.topics.parquet.upload_representation_file(file_path, association, caller_org_id=None, roboto_client=None)#
Parameters:
Return type:

str