Parcourir la source

complite extractor version 0.1.0

chenhaiyang il y a 4 ans
Parent
commit
c13c1fb224

+ 5 - 0
.gitignore

@@ -58,3 +58,8 @@ docs/_build/
 # PyBuilder
 target/
 
+# VSCdoe setting
+.vscode/
+
+# pycharm 配置文件
+.idea/

+ 1 - 0
MANIFEST.in

@@ -0,0 +1 @@
+include kwextraction/*.json

+ 0 - 0
kwextraction/__init__.py


Fichier diff supprimé car celui-ci est trop grand
+ 1 - 0
kwextraction/equipment_info.json


+ 24 - 0
kwextraction/extraction.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+import json
+import pkgutil
+
+_file = pkgutil.get_data(__package__, 'equipment_info.json')
+_equipment_data = json.loads(_file)
+
+
+def extractor(text_list):
+    for text in text_list:
+        result = dict()
+        result.update({'str': text})
+        for (name, params) in _equipment_data.items():
+            if name in text:
+                result.update({'equipment': name})
+                for param in params:
+                    if param in text:
+                        result.update({'param': param})
+
+        if 'param' not in result.keys():
+            result.update({'param': ''})
+
+        yield result

+ 23 - 0
kwextraction/get_equipment_info.py

@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+import json
+
+import requests
+
+
+def query_equipment_info():
+    url = 'http://api.sagacloud.cn/pythonserver/equipment_parameter_access/get_equipname_paramname/'
+    response = requests.get(url, timeout=30)
+
+    return response.json()
+
+
+def write_info(info):
+    with open('equipment_info.json', 'wt') as f:
+        json.dump(info.get('equipment_and_parameter'), f, ensure_ascii=False)
+
+    print('Wrote to json file')
+
+
+if __name__ == '__main__':
+    write_info(query_equipment_info())

+ 18 - 0
setup.py

@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+
+import pathlib
+from setuptools import setup
+
+HERE = pathlib.Path(__file__).parent
+
+README = (HERE / 'README.md').read_text()
+
+setup(
+    name='kwextraction',
+    version='0.1.0',
+    author='Chen Haiyang',
+    license='MIT',
+    packages=['kwextraction'],
+    include_package_data=True,
+    install_requires=['requests']
+)

+ 0 - 0
tests/__init__.py


+ 17 - 0
tests/test_extraction.py

@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+
+import json
+from kwextraction.extraction import extractor
+
+
+def get_key_words(text_list):
+    return extractor(text_list)
+
+
+if __name__ == '__main__':
+    test_l = [
+        '1#1-3号生活热水水泵的耗电功率是多少@=-',
+        '1#1-3号生活热水水泵的出水温度d是多少@=-',
+    ]
+    for item in get_key_words(test_l):
+        print(json.dumps(item, sort_keys=True, indent=4, ensure_ascii=False))