-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: fetch and parse full gmail message #5160
Changes from 1 commit
dc83466
c2ec260
77aec94
d4831b0
7a4446d
bc30639
5632a66
daa91ca
fb34418
40d2a6b
e492f7a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,15 @@ | ||
import { Injectable, Logger } from '@nestjs/common'; | ||
|
||
import { AxiosResponse } from 'axios'; | ||
import { simpleParser } from 'mailparser'; | ||
import planer from 'planer'; | ||
import addressparser from 'addressparser'; | ||
|
||
import { GmailMessage } from 'src/modules/messaging/types/gmail-message'; | ||
import { MessageQuery } from 'src/modules/messaging/types/message-or-thread-query'; | ||
import { GmailMessageParsedResponse } from 'src/modules/messaging/types/gmail-message-parsed-response'; | ||
import { FetchByBatchesService } from 'src/modules/messaging/services/fetch-by-batch/fetch-by-batch.service'; | ||
import { formatAddressObjectAsParticipants } from 'src/modules/messaging/services/utils/format-address-object-as-participants.util'; | ||
import { assert } from 'src/utils/assert'; | ||
|
||
@Injectable() | ||
export class FetchMessagesByBatchesService { | ||
|
@@ -73,28 +74,25 @@ export class FetchMessagesByBatchesService { | |
return; | ||
} | ||
|
||
const { historyId, id, threadId, internalDate, raw } = message; | ||
|
||
const body = atob(raw?.replace(/-/g, '+').replace(/_/g, '/')); | ||
|
||
try { | ||
const parsed = await simpleParser(body, { | ||
skipHtmlToText: true, | ||
skipImageLinks: true, | ||
skipTextToHtml: true, | ||
maxHtmlLengthToParse: 0, | ||
}); | ||
|
||
const { subject, messageId, from, to, cc, bcc, text, attachments } = | ||
parsed; | ||
const { | ||
historyId, | ||
id, | ||
threadId, | ||
internalDate, | ||
subject, | ||
from, | ||
to, | ||
headerMessageId, | ||
text, | ||
attachments, | ||
} = this.parseGmailMessage(message); | ||
|
||
if (!from) throw new Error('From value is missing'); | ||
|
||
const participants = [ | ||
...formatAddressObjectAsParticipants(from, 'from'), | ||
...formatAddressObjectAsParticipants(to, 'to'), | ||
...formatAddressObjectAsParticipants(cc, 'cc'), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rostaklein why are we removing these participants? |
||
...formatAddressObjectAsParticipants(bcc, 'bcc'), | ||
]; | ||
|
||
let textWithoutReplyQuotations = text; | ||
|
@@ -115,12 +113,12 @@ export class FetchMessagesByBatchesService { | |
const messageFromGmail: GmailMessage = { | ||
historyId, | ||
externalId: id, | ||
headerMessageId: messageId || '', | ||
headerMessageId, | ||
subject: subject || '', | ||
messageThreadExternalId: threadId, | ||
internalDate, | ||
fromHandle: from.value[0].address || '', | ||
fromDisplayName: from.value[0].name || '', | ||
fromHandle: from[0].address || '', | ||
fromDisplayName: from[0].name || '', | ||
participants, | ||
text: sanitizeString(textWithoutReplyQuotations || ''), | ||
attachments, | ||
|
@@ -157,4 +155,62 @@ export class FetchMessagesByBatchesService { | |
|
||
return { messages, errors }; | ||
} | ||
|
||
private parseGmailMessage(message: GmailMessageParsedResponse) { | ||
const subject = this.getPropertyFromHeaders(message, 'Subject'); | ||
const rawFrom = this.getPropertyFromHeaders(message, 'From'); | ||
const rawTo = this.getPropertyFromHeaders(message, 'To'); | ||
const messageId = this.getPropertyFromHeaders(message, 'Message-ID'); | ||
const id = message.id; | ||
const threadId = message.threadId; | ||
const historyId = message.historyId; | ||
const internalDate = message.internalDate; | ||
|
||
assert(id); | ||
assert(threadId); | ||
assert(historyId); | ||
assert(internalDate); | ||
|
||
const bodyData = this.getBodyData(message); | ||
const text = bodyData ? Buffer.from(bodyData, 'base64').toString() : ''; | ||
|
||
return { | ||
id, | ||
headerMessageId: messageId, | ||
threadId, | ||
historyId, | ||
internalDate, | ||
subject, | ||
from: addressparser(rawFrom), | ||
to: addressparser(rawTo), | ||
text, | ||
attachments: [], | ||
}; | ||
} | ||
|
||
private getBodyData(message: GmailMessageParsedResponse) { | ||
const firstPart = message.payload?.parts?.[0]; | ||
|
||
if (firstPart?.mimeType === 'text/plain') { | ||
return firstPart?.body?.data; | ||
} | ||
|
||
return firstPart?.parts?.find((part) => part.mimeType === 'text/plain') | ||
?.body?.data; | ||
} | ||
|
||
private getPropertyFromHeaders( | ||
message: GmailMessageParsedResponse, | ||
property: string, | ||
) { | ||
const value = message.payload?.headers?.find( | ||
(header) => header.name === property, | ||
)?.value; | ||
|
||
if (value === undefined || value === null) { | ||
throw new Error(`Cannot find property "${property}" in message headers`); | ||
} | ||
|
||
return value; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,6 @@ | ||
export type GmailMessageParsedResponse = { | ||
id: string; | ||
threadId: string; | ||
labelIds: string[]; | ||
snippet: string; | ||
sizeEstimate: number; | ||
raw: string; | ||
historyId: string; | ||
internalDate: string; | ||
import { gmail_v1 } from 'googleapis'; | ||
|
||
export type GmailMessageParsedResponse = gmail_v1.Schema$Message & { | ||
error?: { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not a big fan of having errors mixed with the message. Also, it seems that the new type is having all fields as optionals. Can we infer a type from gmail_v1.Schema$Message with all fields we need required? Do we need to have errors here or can we add it as an additional parameter of functions when needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure, it was there before, I assume perhaps because of the way how There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. aren't you adding the errors to this type? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I tried to have a look at this once again and even though im still not completely able to understand what We can of course make the type nicer by making it a literal like
I mean, this is the best type we can get, or is it not? It comes straight from Google that clearly says some properties might are optional. When testing, those we need were always defined, but isnt it better to be safe than sorry? Im doing a runtime assertion using:
This would throw early (message wouldnt be parsed) if some of them are actually missing. Safer approach actually rather than letting it throw further down the line with "cant read property X of undefined" or saving invalid data into the DB. Let me know what you think about these two topics :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer the assert pattern that you are using I've just remove the errors from your type and it looks good |
||
code: number; | ||
message: string; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this seems a bit overkill, let's remove it :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I didnt mean to commit, but I needed a way to debug the email fetching, I was hoping that the worker would do that, but it didnt, any tips how to do that? :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the worker should do it actually, if you start the worker with the debugger and place a breakpoint it should respect it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice! It finally works! :) The missing peace for me was actually uncommenting the
MESSAGE_QUEUE_TYPE
env variable so its not doing the sync type of queue which always landed in one process and the worker didnt pick it up :D Im still learning a lot about this project and there are always some parts which you guys take for granted and im getting surprised :)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
plus i got finally a person created out of the conversation as well 🎉 had to figure out all these conditions that were made with a good intention 😄 (sending a gmail to gmail message didnt do what I wanted it to do)