Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| # In[52]: | |
| # !pip install -q pymilvus towhee gradio | |
| # In[53]: | |
| #!curl -L https://github.com/pankajkishore/Cognitive-Project/blob/master/latest_ticket_data.csv -O | |
| # In[1]: | |
| import pandas as pd | |
| df = pd.read_csv('latest_ticket_data.csv') | |
| df.head() | |
| # In[2]: | |
| df.shape | |
| # In[3]: | |
| df['length'] = df['description'].apply( | |
| lambda row: min(len(row.split(" ")), len(row)) if isinstance(row, str) else None | |
| ) | |
| df['length'].max() | |
| # In[4]: | |
| df.description[14] | |
| # In[5]: | |
| df.shape | |
| # In[6]: | |
| id_category = df.set_index('id')['category'].to_dict() | |
| # In[7]: | |
| id_description = df.set_index('id')['description'].to_dict() | |
| # In[8]: | |
| id_description[12] | |
| # In[9]: | |
| id_category[10] | |
| # In[11]: | |
| from milvus import default_server | |
| from pymilvus import connections, utility | |
| default_server.start() | |
| # In[12]: | |
| from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility | |
| # In[24]: | |
| # # Milvus parameters | |
| connections.connect(host='127.0.0.1', port='19530') | |
| # In[25]: | |
| default_server.listen_port | |
| # In[17]: | |
| def create_milvus_collection(collection_name, dim): | |
| connections.connect(host='127.0.0.1', port='19530') | |
| if utility.has_collection(collection_name): | |
| utility.drop_collection(collection_name) | |
| fields = [ | |
| FieldSchema(name='id', dtype=DataType.VARCHAR, descrition='ids', max_length=500, is_primary=True, auto_id=False), | |
| FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=dim) | |
| ] | |
| schema = CollectionSchema(fields=fields, description='reverse text search') | |
| collection = Collection(name=collection_name, schema=schema) | |
| # create IVF_FLAT index for collection. | |
| index_params = { | |
| 'metric_type':'L2', | |
| 'index_type':"IVF_FLAT", | |
| 'params':{"nlist":2048} | |
| } | |
| collection.create_index(field_name="embedding", index_params=index_params) | |
| return collection | |
| # In[18]: | |
| collection = create_milvus_collection('latest_ticket_data', 768) | |
| # In[19]: | |
| collection.load() | |
| # In[26]: | |
| from towhee import pipe, ops | |
| import numpy as np | |
| from towhee.datacollection import DataCollection | |
| insert_pipe = ( | |
| pipe.input('id', 'description', 'category') | |
| .map('description', 'vec', ops.text_embedding.dpr(model_name='facebook/dpr-ctx_encoder-single-nq-base')) | |
| .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0)) | |
| .map(('id', 'vec'), 'insert_status', ops.ann_insert.milvus_client(host='127.0.0.1', | |
| port='19530', | |
| collection_name='latest_ticket_data')) | |
| .output() | |
| ) | |
| # In[ ]: | |
| # File "/Users/www.abcom.in/Documents/milvus/.milvusenv/lib/python3.11/site-packages/transformers/models/bert/modeling_bert.py", line 238, in forward | |
| # embeddings += position_embeddings | |
| # RuntimeError: The size of tensor a (1002) must match the size of tensor b (512) at non-singleton dimension 1 | |
| # In[27]: | |
| import csv | |
| with open('latest_ticket_data.csv', encoding='utf-8') as f: | |
| reader = csv.reader(f) | |
| next(reader) | |
| for row in reader: | |
| insert_pipe(*row) | |
| # In[28]: | |
| collection.load() | |
| # In[29]: | |
| print('Total number of inserted data is {}.'.format(collection.num_entities)) | |
| # In[30]: | |
| ans_pipe = ( | |
| pipe.input('description') | |
| .map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base")) | |
| .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0)) | |
| .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', | |
| port='19530', | |
| collection_name='latest_ticket_data', | |
| limit=1)) | |
| .map('res', 'category', lambda x: [id_category[int(i[0])] for i in x]) | |
| .output('description', 'category') | |
| ) | |
| # In[31]: | |
| ans = ans_pipe('report hi please attached report user take appropriate actions order agent her computer') | |
| # In[32]: | |
| ans = DataCollection(ans) | |
| ans.show() | |
| # In[33]: | |
| import towhee | |
| def chat(message, history): | |
| history = history or [] | |
| ans_pipe = ( | |
| pipe.input('description') | |
| .map('description', 'vec', ops.text_embedding.dpr(model_name="facebook/dpr-ctx_encoder-single-nq-base")) | |
| .map('vec', 'vec', lambda x: x / np.linalg.norm(x, axis=0)) | |
| .map('vec', 'res', ops.ann_search.milvus_client(host='127.0.0.1', port='19530', collection_name='latest_ticket_data', limit=1)) | |
| .map('res', 'category', lambda x: [id_category[int(i[0])] for i in x]) | |
| .output('description', 'category') | |
| ) | |
| response = ans_pipe(message).get()[1][0] | |
| history.append((message, response)) | |
| return history, history | |
| # In[34]: | |
| import gradio | |
| collection.load() | |
| chatbot = gradio.Chatbot(color_map=("green", "gray")) | |
| interface = gradio.Interface( | |
| chat, | |
| ["text", "state"], | |
| [chatbot, "state"], | |
| allow_screenshot=False, | |
| allow_flagging="never", | |
| ) | |
| interface.launch(inline=True, share=True) | |
| # In[ ]: | |
| # In[ ]: | |
| # In[ ]: | |
| # In[ ]: | |