-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
44 lines (42 loc) · 2.04 KB
/
parser.py
File metadata and controls
44 lines (42 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
try:
from .utils import is_json_schema
except ImportError:
from utils import is_json_schema
def parse_yaml_to_columns(content: dict) -> list:
if isinstance(content, dict):
if is_json_schema(content):
schema_def = list(content.values())[0]
if isinstance(schema_def, dict) and (
"properties" in schema_def or "allOf" in schema_def
):
columns = [" id SERIAL PRIMARY KEY"]
properties = schema_def.get("properties", {})
for prop_name, prop_def in properties.items():
if prop_name == "id":
continue
sql_type = "TEXT"
if isinstance(prop_def, dict) and "type" in prop_def:
t = prop_def["type"]
if t == "string":
if prop_def.get("format") == "date-time":
sql_type = "TIMESTAMP"
elif "url" in prop_name.lower():
sql_type = "VARCHAR(500)"
elif prop_name == "hash":
sql_type = "VARCHAR(64)"
elif prop_name == "inLanguage":
sql_type = "VARCHAR(10)"
elif prop_name in ["author", "publisher"]:
sql_type = "VARCHAR(255)"
elif t == "integer":
sql_type = "INTEGER"
elif t == "number":
sql_type = "DECIMAL"
elif t == "boolean":
sql_type = "BOOLEAN"
columns.append(f" {prop_name} {sql_type}")
return columns
return [f" {key} TEXT" for key in content.keys()]
elif isinstance(content, list) and content and isinstance(content[0], dict):
return [f" {key} TEXT" for key in content[0].keys()]
return [" id SERIAL PRIMARY KEY"]