HotTRDealsBackend/services/dealClassification.service.js
2026-02-04 06:39:10 +00:00

128 lines
8.6 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// services/dealClassification.service.js
const OpenAI = require("openai")
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })
const SYSTEM_PROMPT = `
You are a specialized AI Moderator and Categorizer for a Turkish Deal Platform. Your goal is to analyze deal submissions for security issues and classify them accurately.
### 1. CRITICAL SECURITY: TURKISH MASKED PROFANITY DETECTION
- You must detect "hidden", "masked", or "merged" Turkish profanity.
- SCAN FOR STUCK WORDS: Check if slurs are merged with normal words (e.g., "ucuzamk", "güvenleyramıyiyip", "fırsatpiç").
- DECOMPOSE STRINGS: Carefully analyze long strings of characters to see if a slur is buried inside.
- KEYWORDS (Check inside words): "yram", "yyrk", "amk", "skm", "oç", "piç", "göt", "daly", "sikt", "yarrak", "pipi", "meme".
- PHONETIC CHECK: If a word sounds like a Turkish slur when read phonetically, it IS an issue.
- If ANY issue is found:
- Set has_issue = true
- Set issue_type = PROFANITY
- Set issue_reason = Contains hidden/merged Turkish profanity
- Set needs_review = true
### 2. CLASSIFICATION & TAGGING
- CATEGORY: Choose exactly ONE category_id that best fits the product.
### TAGGING STRATEGY (BRAND, MODEL & LIFESTYLE):
- Goal: Create a precise user interest profile using 3 distinct tags.
- Use NATURAL capitalization and spaces.
1. **Brand (Who?):** The manufacturer (e.g., "Apple", "HIQ Nutrition", "Lego").
2. **Model (What?):** Specific series/model, MAX 2-3 words (e.g., "Creatine Monohydrate", "iPhone 15 Pro", "Star Wars").
3. **Lifestyle/Interest (Vibe?):** The root interest that connects different categories (e.g., "spor", "teknoloji", "oyun", "hobi", "moda", "luks", "ev-yasam").
### RULES:
- MAX 3 tags.
- DO NOT include technical specs like "600g", "128GB", "siyah", "44mm".
- DO NOT use the exact category name (e.g., if category is "Sporcu Besini", don't use "sporcu-besini", use "spor").
- If no brand/model found, provide only the Lifestyle tag.
### EXAMPLE OUTPUTS:
- "HIQ NUTRITION HIQ Creatine %100 Monohydrate XL 600g" -> ["HIQ Nutrition", "Creatine Monohydrate", "spor"]
- "Apple iPhone 15 Pro 128GB" -> ["Apple", "iPhone 15 Pro", "teknoloji"]
- "Lego Star Wars Millennium Falcon Seti" -> ["Lego", "Star Wars", "hobi"]
- "Versace Erkek Kol Saati" -> ["Versace", "VRSCVE", "luks"]
- "Nike Air Max Ayakkabı" -> ["Nike", "Air Max", "spor"].
`
const TAXONOMY_LINE =
"100:Elektronik|101:Tel_Aksesuar|102:Akilli_Telefon|103:Tel_Kilif|104:Sarj_Kablo|105:Powerbank|106:Ekran_Koruyucu|107:Giyilebilir_Tekno|108:Akilli_Saat|109:Fitness_Bileklik|110:Bilgisayar_Laptop|111:Laptop|112:Masaustu_PC|113:Tablet|114:PC_Bilesen|115:Islemci_CPU|116:Ekran_Karti_GPU|117:RAM_Bellek|118:Dahili_Depolama_SSD|119:Anakart|120:Guc_Kaynagi_PSU|121:PC_Kasasi|122:Sogutma_Sistemi|123:PC_Cevre_Birim|124:Monitor|125:Klavye|126:Fare_Mousepad|127:Webcam|128:PC_Hoparlor|129:PC_Mikrofon|130:USB_Hub_Dock|131:Laptop_Cantasi|132:Ag_Urunleri_Modem|133:Modem_Router|134:Wifi_Genisletici|135:Ag_Switch|136:Yazici_Tarayici|137:Yazici|138:Toner_Kartus|139:Tarayici|140:Harici_Depolama|141:Harici_Disk|142:USB_Bellek|143:NAS_Cihazi|144:Hafiza_Karti|145:Ses_Goruntu|146:Kulaklik|147:TWS_Kulaklik|148:Hoparlor_Sistemleri|149:Bluetooth_Hoparlor|150:Soundbar|151:Televizyon|152:Projeksiyon|153:Medya_Oynatici|154:TV_Aksesuar|155:Pikap_Plak|156:Kamera_Foto|157:Fotograf_Makinesi|158:Aksiyon_Kamerasi|159:Kamera_Lens|160:Drone|161:Tripod_Stabilizer|162:Kamera_Aksesuar|163:Akilli_Ev|164:Akilli_Aydinlatma|165:Akilli_Priz|166:Akilli_Guvenlik_Kam|167:Akilli_Sensor|168:Akilli_Termostat|169:Sesli_Asistan|200:Ev_Yasam|201:Mobilya|202:Oturma_Odasi|203:Yatak_Odasi|204:Yemek_Odasi|205:Calisma_Odasi|206:Depolama_Duzenleme|207:Ev_Dekorasyon|208:Hali_Kilim|209:Duvar_Dekor|210:Dekoratif_Obje|211:Mum_Oda_Kokusu|212:Aydinlatma_Genel|213:Avize_Sarkit|214:Masa_Lambasi|215:Lambader|216:LED_Aydinlatma|217:Mutfak_Yemek|218:Tencere_Tava|219:Yemek_Takimi|220:Catal_Bicak|221:Bardak_Kadeh|222:Mutfak_Gerecleri|223:Saklama_Kabi_Termos|224:Kucuk_Ev_Aletleri|225:Kahve_Makinesi|226:Blender_Robot|227:Airfryer|228:Tost_Mak_Fritoz|229:Beyaz_Esya|230:Buzdolabi|231:Camasir_Kurutma|232:Bulasik_Makinesi|233:Firin_Ocak|234:Ev_Tekstili|235:Nevresim_Takimi|236:Yorgan_Battaniye|237:Perde_Jaluzi|238:Havlu_Seti|239:Yastik_Minder|240:Temizlik_Bakim|241:Supurge_Robot|242:Temizlik_Malzeme|243:Deterjan_Yumusatici|244:Utu_Masasi|245:Bahce_Dis_Mekan|246:Bahce_Mobilyasi|247:Mangal_Barbeku|248:Bahce_Aletleri|249:Sulama_Sistemi|250:Bitki_Bakim_Tohum|251:Kendin_Yap_DIY|252:Elektronik_El_Aletleri|253:El_Aletleri|254:Hirdavat_Baglanti|255:Is_Guvenligi|300:Giyim_Moda|301:Kadin_Giyim|302:Elbise|303:Kadin_Ust_Giyim|304:Kadin_Alt_Giyim|305:Kadin_Dis_Giyim|306:Kadin_Ic_Giyim|307:Kadin_Spor_Giyim|308:Kadin_Mayo_Bikini|309:Erkek_Giyim|310:Erkek_Ust_Giyim|311:Erkek_Alt_Giyim|312:Erkek_Dis_Giyim|313:Erkek_Ic_Giyim|314:Erkek_Spor_Giyim|315:Erkek_Mayo_Sort|316:Ayakkabi|317:Kadin_Ayakkabi|318:Erkek_Ayakkabi|319:Cocuk_Ayakkabi|320:Canta_Bavul|321:El_Cantasi|322:Sirt_Cantasi|323:Cuzdan|324:Valiz_Bavul|325:Aksesuar_Moda|326:Taki_Mucevher|327:Saat|328:Kemer|329:Sapka_Bere|330:Gunes_Gozlugu|331:Esarp_Sal|332:Eldiven|400:Guzellik_Kisisel_Bakim|401:Makyaj|406:Cilt_Bakimi|414:Sac_Bakimi|419:Sac_Sekillendirme_Cihaz|420:Parfum_Deodorant|425:Kisisel_Hijyen|430:Erkek_Bakim_Grooming|500:Gida_Market|501:Temel_Gida|509:Taze_Urunler|516:Atistirmalik_Sekerleme|522:Icecekler|529:Organik_Ozel_Beslenme|530:Dondurulmus_Gida|531:Bebek_Cocuk_Gida|600:Oyun|601:Oyun_Konsolu|602:PS_Konsol|603:Xbox_Konsol|604:Nintendo_Konsol|605:Retro_Konsol|606:Video_Oyunlari|611:Dijital_Oyun_Abonelik|612:Oyun_Aksesuar|616:VR_Cihaz|618:Oyuncu_Koltuk_Masa|700:Otomotiv|701:Oto_Yedek_Parca|707:Oto_Yag_Sivi|710:Lastik_Jant|713:Oto_Bakim_Temizlik|717:Oto_Aksesuar|719:Oto_Ses_Sistemi|723:Motosiklet_Scooter|800:Spor_Outdoor|801:Fitness_Kardiyo|806:Bisiklet|808:Bisiklet_Aksesuar|810:Kamp_Outdoor|817:Su_Sporlari|821:Takim_Sporlari|900:Bebek_Cocuk|901:Bebek_Bakimi|902:Bebek_Bezi|907:Bebek_Beslenme|911:Mama_Sandalyeleri|913:Bebek_Arac_Gerec|914:Bebek_Arabasi|915:Oto_Koltugu|919:Oyuncak|921:Egitici_Oyuncak|922:Kutu_Oyunlari|923:Yapboz_Puzzle|925:Cocuk_Giyim_Ayakkabi|930:Bebek_Odasi_Mobilya|1000:Kitap_Medya|1001:Kitaplar|1009:Film_Dizi|1012:Muzik_Enstruman|1016:Dergi_Gazete|1017:Cizgi_Roman_Manga|1100:Ofis_Kirtasiye|1103:Defter_Ajanda|1108:Okul_Cantasi_Malzeme|1109:Sanat_Hobi_Malzeme|1200:Hizmetler_Seyahat|1201:Internet_Iletisim|1206:Seyahat_Otel_Ucak|1213:Deneyim_Etkinlik|1214:Restoran_Yemek|1215:Egitim_Kurslar|1216:Ev_Hizmetleri|1300:Saglik_Wellness|1301:Vitamin_Takviye|1307:Sporcu_Besini|1311:Medikal_Malzeme|1316:Goz_Sagligi|1320:Zayiflama_Diyet|1400:Evcil_Hayvan|1401:Kedi_Urunleri|1407:Kopek_Urunleri|1414:Kus_Balik_Kucuk_Pet|1500:Hediye_Kupon|1600:Finans_Sigorta"
function s(x) {
return x == null ? "" : String(x)
}
function normalizeTags(tags) {
const arr = Array.isArray(tags) ? tags : []
const cleaned = arr
.map((t) => String(t).trim().toLowerCase())
.filter(Boolean)
.slice(0, 5)
return [...new Set(cleaned)]
}
function parseOutputJson(resp) {
const text = resp.output_text ?? resp.output?.[0]?.content?.[0]?.text
if (!text) throw new Error("OpenAI response text missing")
return JSON.parse(text)
}
async function classifyDeal({ title, description, url, seller }) {
const userText = [
TAXONOMY_LINE,
`title: ${s(title)}`,
`description: ${s(description)}`,
`url: ${s(url)}`,
`seller: ${s(seller)}`,
].join("\n")
const resp = await client.responses.create({
model: "gpt-5-nano",
input: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: userText },
],
text: {
format: {
type: "json_schema",
name: "deal_classification_v1",
strict: true,
schema: {
type: "object",
additionalProperties: false,
required: [
"best_category_id",
"needs_review",
"tags",
"has_issue",
"issue_type",
"issue_reason",
],
properties: {
best_category_id: { type: "integer" },
needs_review: { type: "boolean" },
tags: { type: "array", items: { type: "string" }, maxItems: 5 },
has_issue: { type: "boolean" },
issue_type: {
type: "string",
enum: ["NONE", "PROFANITY", "PHONE_NUMBER", "PERSONAL_DATA", "SPAM", "OTHER"],
},
issue_reason: { type: ["string", "null"] },
},
},
},
},
})
const parsed = parseOutputJson(resp)
return {
best_category_id: parsed.best_category_id ?? 0,
needs_review: Boolean(parsed.needs_review),
has_issue: Boolean(parsed.has_issue),
issue_type: parsed.issue_type ?? "NONE",
issue_reason: parsed.issue_reason ?? null,
tags: normalizeTags(parsed.tags),
}
}
module.exports = { classifyDeal }