AutoDebug Python is an open-source tool that leverages the power of GPT-4 to automatically debug and fix Python scripts.<p>Just put in your API Key and the url of your .py and you’re ready to go.<p>Would love to get your feedback as I can’t code and built this with the help of GPT4.<p>Thanks everyone! :)
#%%<p>"""imports"""<p>"""Load html from files, clean up, split, ingest into Weaviate."""
import pickle
import sys<p>from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.document_loaders.html import UnstructuredHTMLLoader<p>"""end of imports"""
# %%<p>def ingest_current_page(input_file):<p><pre><code> """Get documents from web pages."""
try:
# Load the path to the current_page.html
from pathlib import Path
doc_path = Path(input_file).absolute()
loader = UnstructuredHTMLLoader(doc_path)
raw_page = loader.load()
print (f'You have {len(raw_page)} document from the current job application page HTML')
print (f'There are {len(raw_page[0].page_content)} characters in your document HTML')
""""text splitting"""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=100,
chunk_overlap=0,
)
try:
if not all(isinstance(doc.page_content, str) for doc in raw_page):
raise TypeError("Error: Input data must be a list of strings")
documents = text_splitter.split_documents(raw_page)
texts = text_splitter.split_documents(raw_page)
except TypeError as e:
print(e)
sys.exit(1)
print ('Splitting current page HTML into chunks')
print (f'Now you have {len(texts)} HTML chunk documents for current page.')
embeddings = OpenAIEmbeddings()
try:
vectorstore = FAISS.from_documents(documents, embeddings)
except Exception as e:
print(f"Error: Failed to vectorize documents. {e}")
sys.exit(1)
print ('Saving current job application page HTML chunk documents to the vectorstore.pkl file')
"""saving vectorstore file"""
# Save vectorstore
with open("vectorstore.pkl", "wb") as f:
pickle.dump(vectorstore, f)
#print that the HTML chunk documents have been saved to the vectorstore
print("HTML chunk documents have been saved to 'vectorstore.pkl'")
return vectorstore
</code></pre>
"""error handling"""<p><pre><code> except FileNotFoundError:
print(f"Error: Could not find file '{input_file}'")
sys.exit(1)
</code></pre>
# %%<p>"""code execution"""<p>if __name__ == "__main__":
ingest_current_page()