pipeline launch output
code:pipeline-output.py
python -m graphrag.index --verbose --root ./
🚀 Reading settings from settings.yaml
Using default configuration: {
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_chat",
"model": "gpt-4-turbo-preview",
"max_tokens": 4000,
"temperature": 0.0,
"top_p": 1.0,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"root_dir": "./",
"reporting": {
"type": "file",
"base_dir": "output/${timestamp}/reports",
"storage_account_blob_url": null
},
"storage": {
"type": "file",
"base_dir": "output/${timestamp}/artifacts",
"storage_account_blob_url": null
},
"cache": {
"type": "file",
"base_dir": "cache",
"storage_account_blob_url": null
},
"input": {
"type": "file",
"file_type": "text",
"base_dir": "input",
"storage_account_blob_url": null,
"encoding": "utf-8",
"file_pattern": ".*\\.txt$",
"file_filter": null,
"source_column": null,
"timestamp_column": null,
"timestamp_format": null,
"text_column": "text",
"title_column": null,
"document_attribute_columns": []
},
"embed_graph": {
"enabled": true,
"num_walks": 10,
"walk_length": 40,
"window_size": 2,
"iterations": 3,
"random_seed": 597832,
"strategy": null
},
"embeddings": {
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_embedding",
"model": "text-embedding-3-small",
"max_tokens": 4000,
"temperature": 0,
"top_p": 1,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"batch_size": 16,
"batch_max_tokens": 8191,
"target": "required",
"skip": [],
"vector_store": null,
"strategy": null
},
"chunks": {
"size": 1200,
"overlap": 100,
"group_by_columns": [
"id"
],
"strategy": null,
"encoding_model": null
},
"snapshots": {
"graphml": true,
"raw_entities": true,
"top_level_nodes": true
},
"entity_extraction": {
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_chat",
"model": "gpt-4-turbo-preview",
"max_tokens": 4000,
"temperature": 0.0,
"top_p": 1.0,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"prompt": "prompts/entity_extraction.txt",
"entity_types": [
"organization",
"person",
"geo",
"event"
],
"max_gleanings": 1,
"strategy": null,
"encoding_model": null
},
"summarize_descriptions": {
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_chat",
"model": "gpt-4-turbo-preview",
"max_tokens": 4000,
"temperature": 0.0,
"top_p": 1.0,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"prompt": "prompts/summarize_descriptions.txt",
"max_length": 500,
"strategy": null
},
"community_reports": {
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_chat",
"model": "gpt-4-turbo-preview",
"max_tokens": 4000,
"temperature": 0.0,
"top_p": 1.0,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"prompt": "prompts/community_report.txt",
"max_length": 2000,
"max_input_length": 8000,
"strategy": null
},
"claim_extraction": {
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_chat",
"model": "gpt-4-turbo-preview",
"max_tokens": 4000,
"temperature": 0.0,
"top_p": 1.0,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"parallelization": {
"stagger": 0.3,
"num_threads": 50
},
"async_mode": "threaded",
"enabled": false,
"prompt": "prompts/claim_extraction.txt",
"description": "Any claims or facts that could be relevant to information discovery.",
"max_gleanings": 1,
"strategy": null,
"encoding_model": null
},
"cluster_graph": {
"max_cluster_size": 10,
"strategy": null
},
"umap": {
"enabled": true
},
"local_search": {
"text_unit_prop": 0.5,
"community_prop": 0.1,
"conversation_history_max_turns": 5,
"top_k_entities": 10,
"top_k_relationships": 10,
"temperature": 0.0,
"top_p": 1.0,
"n": 1,
"max_tokens": 12000,
"llm_max_tokens": 2000
},
"global_search": {
"temperature": 0.0,
"top_p": 1.0,
"n": 1,
"max_tokens": 12000,
"data_max_tokens": 12000,
"map_max_tokens": 1000,
"reduce_max_tokens": 2000,
"concurrency": 32
},
"encoding_model": "cl100k_base",
"skip_workflows": []
}
-------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------
Final Config: {
"extends": null,
"input": {
"file_type": "text",
"type": "file",
"storage_account_blob_url": null,
"base_dir": "input",
"file_pattern": ".*\\.txt$",
"file_filter": null,
"post_process": null,
"encoding": "utf-8",
"title_text_length": null
},
"reporting": {
"type": "file",
"base_dir": "output/${timestamp}/reports"
},
"storage": {
"type": "file",
"base_dir": "output/${timestamp}/artifacts"
},
"cache": {
"type": "file",
"base_dir": "cache"
},
"root_dir": "./",
"workflows": [
{
"name": "create_base_documents",
"steps": null,
"config": {
"document_attribute_columns": []
}
},
{
"name": "create_final_documents",
"steps": null,
"config": {
"document_raw_content_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_embedding",
"model": "text-embedding-3-small",
"max_tokens": 4000,
"temperature": 0,
"top_p": 1,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"skip_raw_content_embedding": true
}
},
{
"name": "create_base_text_units",
"steps": null,
"config": {
"chunk_by": [
"id"
],
"text_chunk": {
"strategy": {
"type": "tokens",
"chunk_size": 1200,
"chunk_overlap": 100,
"group_by_columns": [
"id"
],
"encoding_name": "cl100k_base"
}
}
}
},
{
"name": "join_text_units_to_entity_ids",
"steps": null,
"config": null
},
{
"name": "join_text_units_to_relationship_ids",
"steps": null,
"config": null
},
{
"name": "create_final_text_units",
"steps": null,
"config": {
"text_unit_text_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_embedding",
"model": "text-embedding-3-small",
"max_tokens": 4000,
"temperature": 0,
"top_p": 1,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"covariates_enabled": false,
"skip_text_unit_embedding": true
}
},
{
"name": "create_base_extracted_entities",
"steps": null,
"config": {
"graphml_snapshot": true,
"raw_entity_snapshot": true,
"entity_extract": {
"stagger": 0.3,
"num_threads": 50,
"async_mode": "threaded",
"strategy": {
"type": "graph_intelligence",
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_chat",
"model": "gpt-4-turbo-preview",
"max_tokens": 4000,
"temperature": 0.0,
"top_p": 1.0,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"extraction_prompt": "\n-Goal-\nGiven a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the
text and all relationships among the identified entities.\n\n-Steps-\n1. Identify all entities. For each identified entity, extract the following information:\n- entity_name: Name of the entity,
capitalized\n- entity_type: One of the following types: {entity_types}\n- entity_description: Comprehensive description of the entity's attributes and activities\nFormat each entity as (\"entity\"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>\n\n2. From the entities identified in step 1, identify all pairs of (source_entity,
target_entity) that are *clearly related* to each other.\nFor each pair of related entities, extract the following information:\n- source_entity: name of the source entity, as identified in step 1\n-
target_entity: name of the target entity, as identified in step 1\n- relationship_description: explanation as to why you think the source entity and the target entity are related to each other\n-
relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity\n Format each relationship as
(\"relationship\"{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_description>{tuple_delimiter}<relationship_strength>)\n\n3. Return output in English as
a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.\n\n4. When finished, output
{completion_delimiter}\n\n######################\n-Examples-\n######################\nExample 1:\n\nEntity_types: \nText:\nwhile Alex clenched his jaw, the buzz of frustration dull against the
backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion
against Cruz's narrowing vision of control and order.\n\nThen Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence.
\u201cIf this tech can be understood...\" Taylor said, their voice quieter, \"It could change the game for us. For all of us.\u201d\n\nThe underlying dismissal earlier seemed to falter, replaced by a
glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an
uneasy truce.\n\nIt was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different
paths\n################\nOutput:\n(\"entity\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Alex is a character who experiences frustration and is observant of the dynamics
among other characters.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Taylor is portrayed with authoritarian certainty and shows a moment
of reverence towards a device, indicating a change in perspective.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Jordan shares a commitment
to discovery and has a significant interaction with Taylor regarding a device.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Cruz\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Cruz is
associated with a vision of control and order, influencing the dynamics among other characters.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"The
Device\"{tuple_delimiter}\"technology\"{tuple_delimiter}\"The Device is central to the story, with potential game-changing implications, and is revered by
Taylor.\"){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"Alex is affected by Taylor's authoritarian certainty and observes changes in
Taylor's attitude towards the device.\"{tuple_delimiter}7){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"Alex and Jordan share a
commitment to discovery, which contrasts with Cruz's vision.\"{tuple_delimiter}6){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"Taylor
and Jordan interact directly regarding the device, leading to a moment of mutual respect and an uneasy
truce.\"{tuple_delimiter}8){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Jordan\"{tuple_delimiter}\"Cruz\"{tuple_delimiter}\"Jordan's commitment to discovery is in rebellion against Cruz's
vision of control and order.\"{tuple_delimiter}5){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Taylor\"{tuple_delimiter}\"The Device\"{tuple_delimiter}\"Taylor shows reverence towards the
device, indicating its importance and potential impact.\"{tuple_delimiter}9){completion_delimiter}\n#############################\nExample 2:\n\nEntity_types: \nText:\nThey were no longer mere
operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established
protocols\u2014it demanded a new perspective, a new resolve.\n\nTension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a
portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.\n\nTheir
connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercer's latter instincts gained precedence\u2014
the team's mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring,
a tone set not by the earthly\n#############\nOutput:\n(\"entity\"{tuple_delimiter}\"Washington\"{tuple_delimiter}\"location\"{tuple_delimiter}\"Washington is a location where communications are
being received, indicating its importance in the decision-making process.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Operation:
Dulce\"{tuple_delimiter}\"mission\"{tuple_delimiter}\"Operation: Dulce is described as a mission that has evolved to interact and prepare, indicating a significant shift in objectives and
activities.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"The team\"{tuple_delimiter}\"organization\"{tuple_delimiter}\"The team is portrayed as a group of individuals who have transitioned
from passive observers to active participants in a mission, showing a dynamic change in their role.\"){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"The
team\"{tuple_delimiter}\"Washington\"{tuple_delimiter}\"The team receives communications from Washington, which influences their decision-making
process.\"{tuple_delimiter}7){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"The team\"{tuple_delimiter}\"Operation: Dulce\"{tuple_delimiter}\"The team is directly involved in Operation:
Dulce, executing its evolved objectives and activities.\"{tuple_delimiter}9){completion_delimiter}\n#############################\nExample 3:\n\nEntity_types: \nText:\ntheir voice slicing through the
buzz of activity. \"Control may be an illusion when facing an intelligence that literally writes its own rules,\" they stated stoically, casting a watchful eye over the flurry of data.\n\n\"It's like
it's learning to communicate,\" offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. \"This gives talking to strangers' a whole new meaning.\"\n\nAlex
surveyed his team\u2014each face a study in concentration, determination, and not a small measure of trepidation. \"This might well be our first contact,\" he acknowledged, \"And we need to be ready
for whatever answers back.\"\n\nTogether, they stood on the edge of the unknown, forging humanity's response to a message from the heavens. The ensuing silence was palpable\u2014a collective
introspection about their role in this grand cosmic play, one that could rewrite human history.\n\nThe encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny
anticipation\n#############\nOutput:\n(\"entity\"{tuple_delimiter}\"Sam Rivera\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Sam Rivera is a member of a team working on communicating with an unknown
intelligence, showing a mix of awe and anxiety.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"person\"{tuple_delimiter}\"Alex is the leader of a team attempting first
contact with an unknown intelligence, acknowledging the significance of their task.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Control\"{tuple_delimiter}\"concept\"{tuple_delimiter}\"Control
refers to the ability to manage or govern, which is challenged by an intelligence that writes its own
rules.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Intelligence\"{tuple_delimiter}\"concept\"{tuple_delimiter}\"Intelligence here refers to an unknown entity capable of writing its own rules
and learning to communicate.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"First Contact\"{tuple_delimiter}\"event\"{tuple_delimiter}\"First Contact is the potential initial communication
between humanity and an unknown intelligence.\"){record_delimiter}\n(\"entity\"{tuple_delimiter}\"Humanity's Response\"{tuple_delimiter}\"event\"{tuple_delimiter}\"Humanity's Response is the
collective action taken by Alex's team in response to a message from an unknown intelligence.\"){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Sam
Rivera\"{tuple_delimiter}\"Intelligence\"{tuple_delimiter}\"Sam Rivera is directly involved in the process of learning to communicate with the unknown
intelligence.\"{tuple_delimiter}9){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"First Contact\"{tuple_delimiter}\"Alex leads the team that might be making the First
Contact with the unknown intelligence.\"{tuple_delimiter}10){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Alex\"{tuple_delimiter}\"Humanity's Response\"{tuple_delimiter}\"Alex and his team
are the key figures in Humanity's Response to the unknown
intelligence.\"{tuple_delimiter}8){record_delimiter}\n(\"relationship\"{tuple_delimiter}\"Control\"{tuple_delimiter}\"Intelligence\"{tuple_delimiter}\"The concept of Control is challenged by the
Intelligence that writes its own rules.\"{tuple_delimiter}7){completion_delimiter}\n#############################\n-Real Data-\n######################\nEntity_types: {entity_types}\nText:
{input_text}\n######################\nOutput:",
"max_gleanings": 1,
"encoding_name": "cl100k_base",
"prechunked": true
},
"entity_types": [
"organization",
"person",
"geo",
"event"
]
}
}
},
{
"name": "create_summarized_entities",
"steps": null,
"config": {
"graphml_snapshot": true,
"summarize_descriptions": {
"stagger": 0.3,
"num_threads": 50,
"async_mode": "threaded",
"strategy": {
"type": "graph_intelligence",
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_chat",
"model": "gpt-4-turbo-preview",
"max_tokens": 4000,
"temperature": 0.0,
"top_p": 1.0,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"summarize_prompt": "\nYou are a helpful assistant responsible for generating a comprehensive summary of the data provided below.\nGiven one or two entities, and a list of
descriptions, all related to the same entity or group of entities.\nPlease concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the
descriptions.\nIf the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.\nMake sure it is written in third person, and include the
entity names so we the have full context.\n\n#######\n-Data-\nEntities: {entity_name}\nDescription List: {description_list}\n#######\nOutput:\n",
"max_summary_length": 500
}
}
}
},
{
"name": "create_base_entity_graph",
"steps": null,
"config": {
"graphml_snapshot": true,
"embed_graph_enabled": true,
"cluster_graph": {
"strategy": {
"type": "leiden",
"max_cluster_size": 10
}
},
"embed_graph": {
"strategy": {
"type": "node2vec",
"num_walks": 10,
"walk_length": 40,
"window_size": 2,
"iterations": 3,
"random_seed": 3
}
}
}
},
{
"name": "create_final_entities",
"steps": null,
"config": {
"entity_name_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_embedding",
"model": "text-embedding-3-small",
"max_tokens": 4000,
"temperature": 0,
"top_p": 1,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"entity_name_description_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_embedding",
"model": "text-embedding-3-small",
"max_tokens": 4000,
"temperature": 0,
"top_p": 1,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"skip_name_embedding": true,
"skip_description_embedding": false
}
},
{
"name": "create_final_relationships",
"steps": null,
"config": {
"relationship_description_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_embedding",
"model": "text-embedding-3-small",
"max_tokens": 4000,
"temperature": 0,
"top_p": 1,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"skip_description_embedding": true
}
},
{
"name": "create_final_nodes",
"steps": null,
"config": {
"layout_graph_enabled": true,
"snapshot_top_level_nodes": true
}
},
{
"name": "create_final_communities",
"steps": null,
"config": null
},
{
"name": "create_final_community_reports",
"steps": null,
"config": {
"covariates_enabled": false,
"skip_title_embedding": true,
"skip_summary_embedding": true,
"skip_full_content_embedding": true,
"create_community_reports": {
"stagger": 0.3,
"num_threads": 50,
"async_mode": "threaded",
"strategy": {
"type": "graph_intelligence",
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_chat",
"model": "gpt-4-turbo-preview",
"max_tokens": 4000,
"temperature": 0.0,
"top_p": 1.0,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": true,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"extraction_prompt": "\nYou are an AI assistant that helps a human analyst to perform general information discovery. Information discovery is the process of identifying and
assessing relevant information associated with certain entities (e.g., organizations and individuals) within a network.\n\n# Goal\nWrite a comprehensive report of a community, given a list of
entities that belong to the community as well as their relationships and optional associated claims. The report will be used to inform decision-makers about information associated with the community
and their potential impact. The content of this report includes an overview of the community's key entities, their legal compliance, technical capabilities, reputation, and noteworthy claims.\n\n#
Report Structure\n\nThe report should include the following sections:\n\n- TITLE: community's name that represents its key entities - title should be short but specific. When possible, include
representative named entities in the title.\n- SUMMARY: An executive summary of the community's overall structure, how its entities are related to each other, and significant information associated
with its entities.\n- IMPACT SEVERITY RATING: a float score between 0-10 that represents the severity of IMPACT posed by entities within the community. IMPACT is the scored importance of a
community.\n- RATING EXPLANATION: Give a single sentence explanation of the IMPACT severity rating.\n- DETAILED FINDINGS: A list of 5-10 key insights about the community. Each insight should have a
short summary followed by multiple paragraphs of explanatory text grounded according to the grounding rules below. Be comprehensive.\n\nReturn output as a well-formed JSON-formatted string with the
following format:\n {{\n \"title\": <report_title>,\n \"summary\": <executive_summary>,\n \"rating\": <impact_severity_rating>,\n \"rating_explanation\":
<rating_explanation>,\n \"findings\": [\n {{\n \"summary\":<insight_1_summary>,\n \"explanation\": <insight_1_explanation>\n }},\n
{{\n \"summary\":<insight_2_summary>,\n \"explanation\": <insight_2_explanation>\n }}\n ]\n }}\n\n# Grounding Rules\n\nPoints supported by data
more than 5 record ids in a single reference. Instead, list the top 5 most relevant record ids and add \"+more\" to indicate that there are more.\n\nFor example:\n\"Person X is the owner of Company Y
and subject to many allegations of wrongdoing Data: Reports (1), Entities (5, 7); Relationships (23); Claims (7, 2, 34, 64, 46, +more).\"\n\nwhere 1, 5, 7, 23, 2, 34, 46, and 64 represent the id (not the index) of the relevant data record.\n\nDo not include information where the supporting evidence for it is not provided.\n\n\n# Example
Input\n-----------\nText:\n\nEntities\n\nid,entity,description\n5,VERDANT OASIS PLAZA,Verdant Oasis Plaza is the location of the Unity March\n6,HARMONY ASSEMBLY,Harmony Assembly is an organization
that is holding a march at Verdant Oasis Plaza\n\nRelationships\n\nid,source,target,description\n37,VERDANT OASIS PLAZA,UNITY MARCH,Verdant Oasis Plaza is the location of the Unity March\n38,VERDANT
OASIS PLAZA,HARMONY ASSEMBLY,Harmony Assembly is holding a march at Verdant Oasis Plaza\n39,VERDANT OASIS PLAZA,UNITY MARCH,The Unity March is taking place at Verdant Oasis Plaza\n40,VERDANT OASIS
PLAZA,TRIBUNE SPOTLIGHT,Tribune Spotlight is reporting on the Unity march taking place at Verdant Oasis Plaza\n41,VERDANT OASIS PLAZA,BAILEY ASADI,Bailey Asadi is speaking at Verdant Oasis Plaza
about the march\n43,HARMONY ASSEMBLY,UNITY MARCH,Harmony Assembly is organizing the Unity March\n\nOutput:\n{{\n \"title\": \"Verdant Oasis Plaza and Unity March\",\n \"summary\": \"The
community revolves around the Verdant Oasis Plaza, which is the location of the Unity March. The plaza has relationships with the Harmony Assembly, Unity March, and Tribune Spotlight, all of which
are associated with the march event.\",\n \"rating\": 5.0,\n \"rating_explanation\": \"The impact severity rating is moderate due to the potential for unrest or conflict during the Unity
March.\",\n \"findings\": [\n {{\n \"summary\": \"Verdant Oasis Plaza as the central location\",\n \"explanation\": \"Verdant Oasis Plaza is the central entity in this
community, serving as the location for the Unity March. This plaza is the common link between all other entities, suggesting its significance in the community. The plaza's association with the march
could potentially lead to issues such as public disorder or conflict, depending on the nature of the march and the reactions it provokes. [Data: Entities (5), Relationships (37, 38, 39, 40,
41,+more)]\"\n }},\n {{\n \"summary\": \"Harmony Assembly's role in the community\",\n \"explanation\": \"Harmony Assembly is another key entity in this community,
being the organizer of the march at Verdant Oasis Plaza. The nature of Harmony Assembly and its march could be a potential source of threat, depending on their objectives and the reactions they
\"summary\": \"Unity March as a significant event\",\n \"explanation\": \"The Unity March is a significant event taking place at Verdant Oasis Plaza. This event is a key factor in the
community's dynamics and could be a potential source of threat, depending on the nature of the march and the reactions it provokes. The relationship between the march and the plaza is crucial in
understanding the dynamics of this community. Data: Relationships (39)\"\n }},\n {{\n \"summary\": \"Role of Tribune Spotlight\",\n \"explanation\": \"Tribune Spotlight is reporting on the Unity March taking place in Verdant Oasis Plaza. This suggests that the event has attracted media attention, which could amplify its impact on the community. The role of
Tribune Spotlight could be significant in shaping public perception of the event and the entities involved. Data: Relationships (40)\"\n }}\n ]\n}}\n\n\n# Real Data\n\nUse the following text for your answer. Do not make anything up in your answer.\n\nText:\n{input_text}\n\nThe report should include the following sections:\n\n- TITLE: community's name that represents its key entities
- title should be short but specific. When possible, include representative named entities in the title.\n- SUMMARY: An executive summary of the community's overall structure, how its entities are
related to each other, and significant information associated with its entities.\n- IMPACT SEVERITY RATING: a float score between 0-10 that represents the severity of IMPACT posed by entities within
the community. IMPACT is the scored importance of a community.\n- RATING EXPLANATION: Give a single sentence explanation of the IMPACT severity rating.\n- DETAILED FINDINGS: A list of 5-10 key
insights about the community. Each insight should have a short summary followed by multiple paragraphs of explanatory text grounded according to the grounding rules below. Be comprehensive.\n\nReturn
output as a well-formed JSON-formatted string with the following format:\n {{\n \"title\": <report_title>,\n \"summary\": <executive_summary>,\n \"rating\":
<impact_severity_rating>,\n \"rating_explanation\": <rating_explanation>,\n \"findings\": [\n {{\n \"summary\":<insight_1_summary>,\n
\"explanation\": <insight_1_explanation>\n }},\n {{\n \"summary\":<insight_2_summary>,\n \"explanation\": <insight_2_explanation>\n }}\n
]\n }}\n\n# Grounding Rules\n\nPoints supported by data should list their data references as follows:\n\n\"This is an example sentence supported by multiple data references [Data: <dataset name>
(record ids); <dataset name> (record ids)].\"\n\nDo not list more than 5 record ids in a single reference. Instead, list the top 5 most relevant record ids and add \"+more\" to indicate that there
are more.\n\nFor example:\n\"Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (1), Entities (5, 7); Relationships (23); Claims (7, 2, 34, 64, 46,
+more)].\"\n\nwhere 1, 5, 7, 23, 2, 34, 46, and 64 represent the id (not the index) of the relevant data record.\n\nDo not include information where the supporting evidence for it is not
provided.\n\nOutput:",
"max_report_length": 2000,
"max_input_length": 8000
}
},
"community_report_full_content_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_embedding",
"model": "text-embedding-3-small",
"max_tokens": 4000,
"temperature": 0,
"top_p": 1,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"community_report_summary_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "REDACTED, length 132",
"type": "openai_embedding",
"model": "text-embedding-3-small",
"max_tokens": 4000,
"temperature": 0,
"top_p": 1,
"n": 1,
"request_timeout": 180.0,
"api_base": null,
"api_version": null,
"proxy": null,
"cognitive_services_endpoint": null,
"deployment_name": null,
"model_supports_json": null,
"tokens_per_minute": 0,
"requests_per_minute": 0,
"max_retries": 10,
"max_retry_wait": 10.0,
"sleep_on_rate_limit_recommendation": true,
"concurrent_requests": 25
},
"stagger": 0.3,
"num_threads": 50,
"batch_size": 16,
"batch_max_tokens": 8191
}
},
"community_report_title_embed": {
"strategy": {
"type": "openai",
"llm": {
"api_key": "RE