ZGLanguage 解析SQL数据血缘 之 Python提取SQL表级血缘树信息

### 根据之前文章的说明,使用 ZGLanguage 将以下SQL解析输出扁平化、可标注的编程语言解析结果 :

# SQL代码 :

CREATE TABLE TMP_O_SS_CL_1

AS

    SELECT CL.OBJECTNO

         , PP.PAYDATE

      FROM NYBDP.O_SS_CL_LOAN_ACCT_STATIS CL

 LEFT JOIN (SELECT AL.PAYDATE

                 , BC.BUSS_NO

              FROM O_CWWS_ACCT_LOAN AL

        INNER JOIN NYBDP.O_CWWS_BUSINESS_DUEBILL BD

                ON 1=1

         LEFT JOIN O_CWWS_BUSINESS_CONTRACT BC

                ON 1=1

           ) PP

        ON 1=1

         ;


# 解析结果 :

1 P 2 1 __CREATE_TABLE_SELECT__ 1 x1 CREATE

2 P 2 1 __CREATE_TABLE_SELECT__ 1 x2 TABLE

3 P 2 1 __CREATE_TABLE_SELECT__ 1 目标表 TMP_O_SS_CL_1

4 P 3 1 __CREATE_TABLE_SELECT__ 1 x3 AS

5 P 4 bbbb 2 __SELECT__ 1 x1 SELECT

6 P 4 bbbb 3 __OTH_COL__ 1 x1 CL

7 P 4 bbbb 3 __OTH_COL__ 1 x2 .

8 P 4 bbbb 3 __OTH_COL__ 1 x3 OBJECTNO

9 P 3 __OTH_COL__ 6

10 P 5 bbbbbbbbb 2 __SELECT__ 1 x3 ,

11 P 5 bbbbbbbbb 4 __OTH_COL__ 1 x1 PP

12 P 5 bbbbbbbbb 4 __OTH_COL__ 1 x2 .

13 P 5 bbbbbbbbb 4 __OTH_COL__ 1 x3 PAYDATE

14 P 4 __OTH_COL__ 11

15 P 6 bbbbbb 2 __SELECT__ 1 x4 FROM

16 P 6 bbbbbb 5 __TABLE_NAME__ 1 schema NYBDP

17 P 6 bbbbbb 5 __TABLE_NAME__ 1 pp .

18 P 6 bbbbbb 5 __TABLE_NAME__ 1 源表_2 O_SS_CL_LOAN_ACCT_STATIS

19 P 6 bbbbbb 5 __TABLE_NAME__ 1 源表别名 CL

20 P 5 __TABLE_NAME__ 16

21 P 7 b 6 __JOIN_TABLE__ 1 x5 LEFT

22 P 7 b 6 __JOIN_TABLE__ 1 x7 JOIN

23 P 7 b 7 __SUB_SELECT__ 1 x1 (

24 P 7 b 8 __SELECT__ 1 x1 SELECT

25 P 7 b 9 __OTH_COL__ 1 x1 AL

26 P 7 b 9 __OTH_COL__ 1 x2 .

27 P 7 b 9 __OTH_COL__ 1 x3 PAYDATE

28 P 9 __OTH_COL__ 25

29 P 8 bbbbbbbbbbbbbbbbb 8 __SELECT__ 1 x3 ,

30 P 8 bbbbbbbbbbbbbbbbb 10 __OTH_COL__ 1 x1 BC

31 P 8 bbbbbbbbbbbbbbbbb 10 __OTH_COL__ 1 x2 .

32 P 8 bbbbbbbbbbbbbbbbb 10 __OTH_COL__ 1 x3 BUSS_NO

33 P 10 __OTH_COL__ 30

34 P 9 bbbbbbbbbbbbbb 8 __SELECT__ 1 x4 FROM

35 P 9 bbbbbbbbbbbbbb 11 __TABLE_NAME__ 1 源表 O_CWWS_ACCT_LOAN

36 P 9 bbbbbbbbbbbbbb 11 __TABLE_NAME__ 1 源表别名 AL

37 P 11 __TABLE_NAME__ 35

38 P 10 bbbbbbbb 12 __JOIN_TABLE__ 1 x1 INNER

39 P 10 bbbbbbbb 12 __JOIN_TABLE__ 1 x2 JOIN

40 P 10 bbbbbbbb 13 __TABLE_NAME__ 1 schema NYBDP

41 P 10 bbbbbbbb 13 __TABLE_NAME__ 1 pp .

42 P 10 bbbbbbbb 13 __TABLE_NAME__ 1 源表_2 O_CWWS_BUSINESS_DUEBILL

43 P 10 bbbbbbbb 13 __TABLE_NAME__ 1 源表别名 BD

44 P 13 __TABLE_NAME__ 40

45 P 11 bbbbbbbbbbbbbbbb 12 __JOIN_TABLE__ 1 x22 ON

46 P 11 bbbbbbbbbbbbbbbb 12 __JOIN_TABLE__ 1 x23 1=1

47 P 12 __JOIN_TABLE__ 38

48 P 12 bbbbbbbbb 14 __JOIN_TABLE__ 1 x5 LEFT

49 P 12 bbbbbbbbb 14 __JOIN_TABLE__ 1 x7 JOIN

50 P 12 bbbbbbbbb 15 __TABLE_NAME__ 1 源表 O_CWWS_BUSINESS_CONTRACT

51 P 12 bbbbbbbbb 15 __TABLE_NAME__ 1 源表别名 BC

52 P 15 __TABLE_NAME__ 50

53 P 13 bbbbbbbbbbbbbbbb 14 __JOIN_TABLE__ 1 x22 ON

54 P 13 bbbbbbbbbbbbbbbb 14 __JOIN_TABLE__ 1 x23 1=1

55 P 14 __JOIN_TABLE__ 48

56 P 8 __SELECT__ 24

57 P 14 bbbbbbbbbbb 7 __SUB_SELECT__ 1 x3 )

58 P 14 bbbbbbbbbbb 7 __SUB_SELECT__ 1 x5 PP

59 P 7 __SUB_SELECT__ 23

60 P 15 bbbbbbbb 6 __JOIN_TABLE__ 1 x22 ON

61 P 15 bbbbbbbb 6 __JOIN_TABLE__ 1 x23 1=1

62 P 6 __JOIN_TABLE__ 21

63 P 2 __SELECT__ 5

64 P 16 bbbbbbbbb 1 __CREATE_TABLE_SELECT__ 1 x5 ;

65 P 1 __CREATE_TABLE_SELECT__ 1


### 根据 ZGLanguage 解析结果,我们便可以轻松地提取出SQL的表级血缘树信息


# 这里我们使用Python进行加工,代码如下:

data_info = [{'level':'0', 'node_type':'', 'value':'', 'children':[]}]

 

def go2OneLevel(fs, curr_level_node, one_row) :

    while one_row[0] :

        # 层级结束判断

        if not one_row[2] and one_row[4] == curr_level_node['level'] :

            return

 

        print('2222=', one_row)

        if one_row[5] not in ('__CREATE_TABLE_SELECT__', '__TABLE_NAME__', '__SUB_SELECT__') :

            one_row = fs.readline().strip().split('\t')

            continue

            

        if curr_level_node['node_type'] == '__CREATE_TABLE_SELECT__' and one_row[7] == '目标表' \

        or curr_level_node['node_type'] == '__TABLE_NAME__' and one_row[7] in ('源表','源表_2') :

            curr_level_node['value'] = one_row[8]

 

        if curr_level_node['level'] != one_row[4] :

            new_level_node = {'level':one_row[4], 'node_type':one_row[5], 'value':'', 'children':[]}

            go2OneLevel(fs, new_level_node, one_row)

            curr_level_node['children'].append(new_level_node)

 

        one_row = fs.readline().strip().split('\t')

 

    return 

 

with open('out.zgl', 'r', encoding='utf-8') as fs :

    one_row = fs.readline().strip().split('\t')

    while one_row[0] :

        print('1111=', one_row)

        

        if one_row[5] not in ('__CREATE_TABLE_SELECT__', '__TABLE_NAME__', '__SUB_SELECT__') :

            one_row = fs.readline().strip().split('\t')

            continue

        

        new_level_node = {'level':one_row[4], 'node_type':one_row[5], 'value':'', 'children':[]}

        print(new_level_node)

        go2OneLevel(fs, new_level_node, one_row)

        

        data_info[0]['children'].append(new_level_node)

        one_row = fs.readline().strip().split('\t')

        

print(data_info)


# 执行代码后,可以得到这段 SQL 表级血缘树信息:


[{'level': '0', 'node_type': '', 'value': '', 

  'children': [{

  

      'level': '1', 'node_type': '__CREATE_TABLE_SELECT__', 'value': 'TMP_O_SS_CL_1', 

  'children': [{

  

      'level': '5', 'node_type': '__TABLE_NAME__', 'value': 'O_SS_CL_LOAN_ACCT_STATIS', 

  'children': []}, {

  

  'level': '7', 'node_type': '__SUB_SELECT__', 'value': '', 

  'children': [{

  

      'level': '11', 'node_type': '__TABLE_NAME__', 'value': 'O_CWWS_ACCT_LOAN', 

  'children': []}, {

  

  'level': '13', 'node_type': '__TABLE_NAME__', 'value': 'O_CWWS_BUSINESS_DUEBILL', 

  'children': []}, {

  

  'level': '15', 'node_type': '__TABLE_NAME__', 'value': 'O_CWWS_BUSINESS_CONTRACT', 

  'children': []}]}]}]}]


简化以上表(value)的血缘树即是 :


目标表 : TMP_O_SS_CL_1

来源表 :         O_SS_CL_LOAN_ACCT_STATIS

子查询 :          __SUB_SELECT__

来源表 :                O_CWWS_ACCT_LOAN

来源表 :                O_CWWS_BUSINESS_DUEBILL

来源表 :                O_CWWS_BUSINESS_CONTRACT







2026-01-09 22:01:09 | 数据血缘测试帐号 | 技术 & 提问 | 阅读111次

回 复 :