Skip to content

Commit

Permalink
Back out some of the changes that were added to localdocs to support …
Browse files Browse the repository at this point in the history
…sending

mixpanel events. These changes added complexity to the Database code which
we're looking to simplify not increase complexity and all info that we care
about from mixpanel should be available from the GUI thread.

NOTE: This change will not compile by itself. It must be included with subsequent
changes made in PR #2302 but is here broken out into a standalone commit
to aid in review.

Signed-off-by: Adam Treat <treat.adam@gmail.com>
  • Loading branch information
manyoso committed May 16, 2024
1 parent a92d266 commit c2c0238
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 105 deletions.
95 changes: 7 additions & 88 deletions gpt4all-chat/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,10 +562,7 @@ void Database::scheduleNext(int folder_id, size_t countForFolder)
updateFolderStatus(folder_id, FolderStatus::Complete);
emit updateInstalled(folder_id, true);
}
if (m_docsToScan.isEmpty()) {
m_scanTimer->stop();
updateIndexingStatus();
}
}

void Database::handleDocumentError(const QString &errorMessage,
Expand Down Expand Up @@ -756,7 +753,6 @@ void Database::scanQueue()
{
if (m_docsToScan.isEmpty()) {
m_scanTimer->stop();
updateIndexingStatus();
return;
}

Expand Down Expand Up @@ -825,8 +821,6 @@ void Database::scanQueue()
QSqlDatabase::database().transaction();
Q_ASSERT(document_id != -1);
if (info.isPdf()) {
updateFolderStatus(folder_id, FolderStatus::Embedding, -1, info.currentPage == 0);

QPdfDocument doc;
if (QPdfDocument::Error::None != doc.load(info.doc.canonicalFilePath())) {
handleDocumentError("ERROR: Could not load pdf",
Expand Down Expand Up @@ -859,8 +853,6 @@ void Database::scanQueue()
emit subtractCurrentBytesToIndex(info.folder, bytes - (bytesPerPage * doc.pageCount()));
}
} else {
updateFolderStatus(folder_id, FolderStatus::Embedding, -1, info.currentPosition == 0);

QFile file(document_path);
if (!file.open(QIODevice::ReadOnly)) {
handleDocumentError("ERROR: Cannot open file for scanning",
Expand Down Expand Up @@ -895,7 +887,7 @@ void Database::scanQueue()
return scheduleNext(folder_id, countForFolder);
}

void Database::scanDocuments(int folder_id, const QString &folder_path, bool isNew)
void Database::scanDocuments(int folder_id, const QString &folder_path)
{
#if defined(DEBUG)
qDebug() << "scanning folder for documents" << folder_path;
Expand Down Expand Up @@ -926,7 +918,6 @@ void Database::scanDocuments(int folder_id, const QString &folder_path, bool isN
}

if (!infos.isEmpty()) {
updateFolderStatus(folder_id, FolderStatus::Started, infos.count(), false, isNew);
enqueueDocuments(folder_id, infos);
}
}
Expand All @@ -948,11 +939,9 @@ void Database::start()
if (m_embeddings->fileExists() && !m_embeddings->load())
qWarning() << "ERROR: Could not load embeddings";

int nAdded = addCurrentFolders();
Network::globalInstance()->trackEvent("localdocs_startup", { {"doc_collections_total", nAdded} });
}

int Database::addCurrentFolders()
void Database::addCurrentFolders()
{
#if defined(DEBUG)
qDebug() << "addCurrentFolders";
Expand All @@ -962,26 +951,21 @@ int Database::addCurrentFolders()
QList<CollectionItem> collections;
if (!selectAllFromCollections(q, &collections)) {
qWarning() << "ERROR: Cannot select collections" << q.lastError();
return 0;
return;
}

emit collectionListUpdated(collections);

int nAdded = 0;
for (const auto &i : collections)
nAdded += addFolder(i.collection, i.folder_path, true);

updateIndexingStatus();

return collections.count() + nAdded;
}

bool Database::addFolder(const QString &collection, const QString &path, bool fromDb)
void Database::addForcedCollection(const CollectionItem &collection)
{
QFileInfo info(path);
if (!info.exists() || !info.isReadable()) {
qWarning() << "ERROR: Cannot add folder that doesn't exist or not readable" << path;
return false;
return;
}

QSqlQuery q;
Expand All @@ -990,13 +974,12 @@ bool Database::addFolder(const QString &collection, const QString &path, bool fr
// See if the folder exists in the db
if (!selectFolder(q, path, &folder_id)) {
qWarning() << "ERROR: Cannot select folder from path" << path << q.lastError();
return false;
return;
}

// Add the folder
if (folder_id == -1 && !addFolderToDB(q, path, &folder_id)) {
qWarning() << "ERROR: Cannot add folder to db with path" << path << q.lastError();
return false;
}

Q_ASSERT(folder_id != -1);
Expand Down Expand Up @@ -1310,69 +1293,5 @@ void Database::directoryChanged(const QString &path)
cleanDB();

// Rescan the documents associated with the folder
scanDocuments(folder_id, path, false);
updateIndexingStatus();
}

void Database::updateIndexingStatus() {
Q_ASSERT(m_scanTimer->isActive() || m_docsToScan.isEmpty());
if (!m_indexingTimer.isValid() && m_scanTimer->isActive()) {
Network::globalInstance()->trackEvent("localdocs_indexing_start");
m_indexingTimer.start();
} else if (m_indexingTimer.isValid() && !m_scanTimer->isActive()) {
qint64 durationMs = m_indexingTimer.elapsed();
Network::globalInstance()->trackEvent("localdocs_indexing_complete", { {"$duration", durationMs / 1000.} });
m_indexingTimer.invalidate();
}
}

void Database::updateFolderStatus(int folder_id, Database::FolderStatus status, int numDocs, bool atStart, bool isNew) {
FolderStatusRecord *lastRecord = nullptr;
if (m_foldersBeingIndexed.contains(folder_id)) {
lastRecord = &m_foldersBeingIndexed[folder_id];
}
Q_ASSERT(lastRecord || status == FolderStatus::Started);

switch (status) {
case FolderStatus::Started:
if (lastRecord == nullptr) {
// record timestamp but don't send an event yet
m_foldersBeingIndexed.insert(folder_id, { QDateTime::currentMSecsSinceEpoch(), isNew, numDocs });
emit updateIndexing(folder_id, true);
}
break;
case FolderStatus::Embedding:
if (!lastRecord->docsChanged) {
Q_ASSERT(atStart);
// send start event with the original timestamp for folders that need updating
const auto *embeddingModels = ModelList::globalInstance()->installedEmbeddingModels();
Network::globalInstance()->trackEvent("localdocs_folder_indexing", {
{"folder_id", folder_id},
{"is_new_collection", lastRecord->isNew},
{"document_count", lastRecord->numDocs},
{"embedding_model", embeddingModels->defaultModelInfo().filename()},
{"chunk_size", m_chunkSize},
{"time", lastRecord->startTime},
});
}
lastRecord->docsChanged += atStart;
lastRecord->chunksRead++;
break;
case FolderStatus::Complete:
if (lastRecord->docsChanged) {
// send complete event for folders that were updated
qint64 durationMs = QDateTime::currentMSecsSinceEpoch() - lastRecord->startTime;
Network::globalInstance()->trackEvent("localdocs_folder_complete", {
{"folder_id", folder_id},
{"is_new_collection", lastRecord->isNew},
{"documents_total", lastRecord->numDocs},
{"documents_changed", lastRecord->docsChanged},
{"chunks_read", lastRecord->chunksRead},
{"$duration", durationMs / 1000.},
});
}
m_foldersBeingIndexed.remove(folder_id);
emit updateIndexing(folder_id, false);
break;
}
scanDocuments(folder_id, path);
}
12 changes: 3 additions & 9 deletions gpt4all-chat/database.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ class Database : public QObject
public Q_SLOTS:
void start();
void scanQueue();
void scanDocuments(int folder_id, const QString &folder_path, bool isNew);
bool addFolder(const QString &collection, const QString &path, bool fromDb);
void scanDocuments(int folder_id, const QString &folder_path);
void addFolder(const QString &collection, const QString &path);
void removeFolder(const QString &collection, const QString &path);
void retrieveFromDB(const QList<QString> &collections, const QString &text, int retrievalSize, QList<ResultInfo> *results);
void cleanDB();
Expand All @@ -90,13 +90,11 @@ private Q_SLOTS:
void directoryChanged(const QString &path);
bool addFolderToWatch(const QString &path);
bool removeFolderFromWatch(const QString &path);
int addCurrentFolders();
void addCurrentFolders();
void handleEmbeddingsGenerated(const QVector<EmbeddingResult> &embeddings);
void handleErrorGenerated(int folder_id, const QString &error);

private:
enum class FolderStatus { Started, Embedding, Complete };
struct FolderStatusRecord { qint64 startTime; bool isNew; int numDocs, docsChanged, chunksRead; };

void removeFolderInternal(const QString &collection, int folder_id, const QString &path);
size_t chunkStream(QTextStream &stream, int folder_id, int document_id, const QString &file,
Expand All @@ -112,15 +110,11 @@ private Q_SLOTS:
void removeFolderFromDocumentQueue(int folder_id);
void enqueueDocumentInternal(const DocumentInfo &info, bool prepend = false);
void enqueueDocuments(int folder_id, const QVector<DocumentInfo> &infos);
void updateIndexingStatus();
void updateFolderStatus(int folder_id, FolderStatus status, int numDocs = -1, bool atStart = false, bool isNew = false);

private:
int m_chunkSize;
QTimer *m_scanTimer;
QMap<int, QQueue<DocumentInfo>> m_docsToScan;
QElapsedTimer m_indexingTimer;
QMap<int, FolderStatusRecord> m_foldersBeingIndexed;
QList<ResultInfo> m_retrieve;
QThread m_dbThread;
QFileSystemWatcher *m_watcher;
Expand Down
2 changes: 1 addition & 1 deletion gpt4all-chat/localdocs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ void LocalDocs::addFolder(const QString &collection, const QString &path)
{
const QUrl url(path);
const QString localPath = url.isLocalFile() ? url.toLocalFile() : path;
emit requestAddFolder(collection, localPath, false);
emit requestAddFolder(collection, localPath);
}

void LocalDocs::removeFolder(const QString &collection, const QString &path)
Expand Down
2 changes: 1 addition & 1 deletion gpt4all-chat/localdocs.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public Q_SLOTS:

Q_SIGNALS:
void requestStart();
void requestAddFolder(const QString &collection, const QString &path, bool fromDb);
void requestAddFolder(const QString &collection, const QString &path);
void requestRemoveFolder(const QString &collection, const QString &path);
void requestChunkSizeChange(int chunkSize);
void localDocsModelChanged();
Expand Down
6 changes: 0 additions & 6 deletions gpt4all-chat/localdocsmodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,6 @@ void LocalDocsModel::addCollectionItem(const CollectionItem &item, bool fromDb)
beginInsertRows(QModelIndex(), m_collectionList.size(), m_collectionList.size());
m_collectionList.append(item);
endInsertRows();

if (!fromDb) {
Network::globalInstance()->trackEvent("doc_collection_add", {
{"collection_count", m_collectionList.count()},
});
}
}

void LocalDocsModel::removeCollectionIf(std::function<bool(CollectionItem)> const &predicate) {
Expand Down

0 comments on commit c2c0238

Please sign in to comment.