diff --git a/pkg/message/service/message_service.go b/pkg/message/service/message_service.go index b2b21c72..4488714d 100644 --- a/pkg/message/service/message_service.go +++ b/pkg/message/service/message_service.go @@ -52,6 +52,10 @@ type ChatPresenceStruct struct { Number string `json:"number"` State string `json:"state"` IsAudio bool `json:"isAudio"` + // Delay, in milliseconds, keeps the "composing"/"recording" indicator alive + // for the given duration (re-sending it periodically) and then sends "paused". + // Only applies when State is "composing". 0 = single fire (legacy behaviour). + Delay int `json:"delay"` } type MarkReadStruct struct { @@ -137,6 +141,13 @@ func (m *messageService) React(data *ReactStruct, instance *instance_model.Insta return nil, errors.New("invalid phone number") } + // Strip the "+" that ParseJID/CreateJID adds. The recipient is used both as + // the SendMessage target (usync/device resolution) AND as the MessageKey + // RemoteJID that references the reacted message's chat. A malformed "+JID" + // breaks device resolution (usync) and prevents the reaction from matching + // the original message's chat. See utils.CanonicalJID. + recipient = utils.CanonicalJID(recipient) + if data.Id == "" { m.loggerWrapper.GetLogger(instance.Id).LogError("[%s] Missing Id in Payload", instance.Id) return nil, errors.New("missing id in payload") @@ -150,7 +161,8 @@ func (m *messageService) React(data *ReactStruct, instance *instance_model.Insta reaction = "" } - // Create MessageKey + // Create MessageKey — msgId is the ID of the message being reacted to, + // NOT the ID of the reaction envelope itself. messageKey := &waCommon.MessageKey{ RemoteJID: proto.String(recipient.String()), FromMe: proto.Bool(fromMe), @@ -161,22 +173,23 @@ func (m *messageService) React(data *ReactStruct, instance *instance_model.Insta if data.Participant != "" { participantJID, ok := utils.ParseJID(data.Participant) if ok { - messageKey.Participant = proto.String(participantJID.String()) + messageKey.Participant = proto.String(utils.CanonicalJID(participantJID).String()) } } msg := &waE2E.Message{ ReactionMessage: &waE2E.ReactionMessage{ - Key: messageKey, - Text: proto.String(reaction), - // GroupingKey: proto.String(reaction), + Key: messageKey, + Text: proto.String(reaction), SenderTimestampMS: proto.Int64(time.Now().UnixMilli()), }, } - response, err := client.SendMessage(context.Background(), recipient, msg, whatsmeow.SendRequestExtra{ - ID: msgId, - }) + // Do NOT pass ID: msgId in SendRequestExtra. Doing so would reuse the + // original message ID as the reaction envelope ID; WhatsApp silently + // deduplicates it and drops the reaction. Let whatsmeow generate a + // fresh, unique ID for the envelope. + response, err := client.SendMessage(context.Background(), recipient, msg) if err != nil { return nil, err } @@ -191,7 +204,7 @@ func (m *messageService) React(data *ReactStruct, instance *instance_model.Insta IsFromMe: true, IsGroup: isGroup, }, - ID: msgId, + ID: response.ID, Timestamp: time.Now(), ServerID: response.ServerID, Type: messageType, @@ -219,18 +232,70 @@ func (m *messageService) ChatPresence(data *ChatPresenceStruct, instance *instan return "", errors.New("invalid phone number") } + // chatstate (typing) is a RAW node sent without usync normalization, so it + // needs a canonical digits-only JID or WhatsApp silently drops it. See + // utils.CanonicalJID for the full rationale. + recipient = utils.CanonicalJID(recipient) + media := "" if data.IsAudio { media = "audio" } - err = client.SendChatPresence(context.Background(), recipient, types.ChatPresence(data.State), types.ChatPresenceMedia(media)) + // WhatsApp only forwards chatstate (typing / recording) events to the + // recipient while the sender is marked online. SendChatPresence merely + // sends the chatstate node — it does NOT mark us available. Background + // presence handling (events.AppStateSyncComplete) may have set us to + // Unavailable, in which case the server silently drops the typing + // indicator. Mark ourselves available first to guarantee delivery. + if presErr := client.SendPresence(context.Background(), types.PresenceAvailable); presErr != nil { + m.loggerWrapper.GetLogger(instance.Id).LogWarn("[%s] SendPresence(available) before chatstate failed (non-fatal): %v", instance.Id, presErr) + } + + state := types.ChatPresence(data.State) + mediaType := types.ChatPresenceMedia(media) + + err = client.SendChatPresence(context.Background(), recipient, state, mediaType) if err != nil { return "", err } - m.loggerWrapper.GetLogger(instance.Id).LogInfo("Message sent to %s", data.Number) + // A single "composing" indicator is ephemeral: WhatsApp expires it after a + // few seconds unless refreshed. When a Delay is provided (and we're typing), + // keep the indicator alive for the requested duration by re-sending it, then + // send "paused" so the indicator clears cleanly instead of timing out. + if data.Delay > 0 && state == types.ChatPresenceComposing { + const keepAliveInterval = 5 * time.Second + const maxDelay = 60 * time.Second + + remaining := time.Duration(data.Delay) * time.Millisecond + if remaining > maxDelay { + remaining = maxDelay + } + + for remaining > 0 { + sleep := keepAliveInterval + if remaining < sleep { + sleep = remaining + } + time.Sleep(sleep) + remaining -= sleep + + if remaining > 0 { + // Refresh the indicator so it doesn't expire mid-delay. + if refreshErr := client.SendChatPresence(context.Background(), recipient, state, mediaType); refreshErr != nil { + m.loggerWrapper.GetLogger(instance.Id).LogWarn("[%s] Refresh chatstate failed (non-fatal): %v", instance.Id, refreshErr) + } + } + } + + if pausedErr := client.SendChatPresence(context.Background(), recipient, types.ChatPresencePaused, mediaType); pausedErr != nil { + m.loggerWrapper.GetLogger(instance.Id).LogWarn("[%s] SendChatPresence(paused) failed (non-fatal): %v", instance.Id, pausedErr) + } + } + + m.loggerWrapper.GetLogger(instance.Id).LogInfo("Presence (%s) sent to %s", data.State, data.Number) return ts.String(), nil } @@ -249,6 +314,10 @@ func (m *messageService) MarkRead(data *MarkReadStruct, instance *instance_model return "", errors.New("invalid phone number") } + // Read receipts are RAW nodes (no usync) — strip the "+" so the receipt + // reaches the recipient. Same root cause as the typing fix above. + jid = utils.CanonicalJID(jid) + err = client.MarkRead(context.Background(), data.Id, time.Now(), jid, jid) if err != nil { m.loggerWrapper.GetLogger(instance.Id).LogError("[%s] error marking message as read: %v", instance.Id, err) diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index e7593477..7ba65bdc 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -215,6 +215,21 @@ func ParseJID(arg string) (whatsmeow_types.JID, bool) { return recipient, true } +// CanonicalJID returns a JID safe for RAW protocol nodes (chatstate / typing, +// read receipts, presence subscribe, etc.). +// +// CreateJID intentionally prefixes phone numbers with "+" (e.g. +// "+554187083284@s.whatsapp.net") to match the IsOnWhatsApp/display convention. +// Message sending tolerates this because whatsmeow normalizes the JID during +// usync/device resolution. RAW nodes are sent WITHOUT usync, so a malformed +// "+JID" reaches the server and the node is silently dropped (e.g. the "typing" +// indicator never reaches the recipient). WhatsApp user JIDs are digits-only, so +// strip the leading "+" to get the canonical form. +func CanonicalJID(jid whatsmeow_types.JID) whatsmeow_types.JID { + jid.User = strings.TrimPrefix(jid.User, "+") + return jid +} + func CreateHTTPProxy(httpHost, httpPort, user, password string) (func(*http.Request) (*url.URL, error), error) { address := fmt.Sprintf("http://%s:%s@%s:%s", user, password, httpHost, httpPort) diff --git a/pkg/utils/utils_test.go b/pkg/utils/utils_test.go index 25a99792..5ff8795f 100644 --- a/pkg/utils/utils_test.go +++ b/pkg/utils/utils_test.go @@ -1,6 +1,7 @@ package utils import ( + "strings" "testing" ) @@ -325,3 +326,61 @@ func TestParseJID(t *testing.T) { }) } } + +// TestCanonicalJID locks in the fix for the silent typing-indicator / receipt +// drop: ParseJID (via CreateJID) prefixes phone numbers with "+", which is +// tolerated by message sending (usync normalizes it) but breaks RAW nodes +// (chatstate, read receipts). CanonicalJID must strip that "+" while leaving +// already-canonical JIDs (LID, group, digits-only) untouched. +func TestCanonicalJID(t *testing.T) { + tests := []struct { + name string + input string + expectedJID string // canonical JID expected after ParseJID -> CanonicalJID + }{ + { + name: "BR phone strips the leading +", + input: "554187083284", + expectedJID: "554187083284@s.whatsapp.net", + }, + { + name: "US phone strips the leading +", + input: "15551234567", + expectedJID: "15551234567@s.whatsapp.net", + }, + { + name: "Already-canonical JID is unchanged", + input: "554187083284@s.whatsapp.net", + expectedJID: "554187083284@s.whatsapp.net", + }, + { + name: "LID JID is left untouched", + input: "15883309207561@lid", + expectedJID: "15883309207561@lid", + }, + { + name: "Group JID is left untouched", + input: "120363123456789012@g.us", + expectedJID: "120363123456789012@g.us", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + jid, ok := ParseJID(tt.input) + if !ok { + t.Fatalf("ParseJID(%q) failed", tt.input) + } + + canonical := CanonicalJID(jid) + + if got := canonical.String(); got != tt.expectedJID { + t.Errorf("CanonicalJID for input %q = %q, want %q", tt.input, got, tt.expectedJID) + } + + if strings.HasPrefix(canonical.User, "+") { + t.Errorf("CanonicalJID for input %q still has '+' in User: %q", tt.input, canonical.User) + } + }) + } +}