nahiarhdNLP


NamenahiarhdNLP JSON
Version 1.3.2 PyPI version JSON
download
home_pageNone
SummaryAdvanced Indonesian Natural Language Processing Library
upload_time2025-07-28 08:24:19
maintainerNone
docs_urlNone
authorNone
requires_python>=3.8
licenseMIT
keywords nlp indonesian natural-language-processing text-processing bahasa-indonesia
VCS
bugtrack_url
requirements No requirements were recorded.
Travis-CI No Travis.
coveralls test coverage No coveralls.
            # nahiarhdNLP - Indonesian Natural Language Processing Library

Library Indonesian Natural Language Processing dengan fitur preprocessing teks, normalisasi slang, konversi emoji, koreksi ejaan, dan berbagai fungsi text processing lainnya.

## ๐Ÿš€ Instalasi

```bash
pip install nahiarhdNLP
```

## ๐Ÿ“ฆ Import Library

```python
# Import functions dari preprocessing
from nahiarhdNLP.preprocessing import (
    # Fungsi pembersihan dasar
    remove_html, remove_emoji, remove_url, remove_mentions, remove_hashtags,
    remove_numbers, remove_punctuation, remove_extra_spaces,
    remove_special_chars, remove_whitespace, to_lowercase,
    # Fungsi normalisasi dan koreksi
    replace_spell_corrector, replace_repeated_chars,
    # Fungsi emoji
    emoji_to_words, words_to_emoji,
    # Fungsi linguistic
    remove_stopwords, stem_text, tokenize,
    # Fungsi pipeline
    pipeline, preprocess, Pipeline
)

# Import kelas untuk penggunaan advanced
from nahiarhdNLP.preprocessing import (
    TextCleaner, SpellCorrector, StopwordRemover,
    Stemmer, EmojiConverter, Tokenizer
)

# Import dataset loader
from nahiarhdNLP.datasets import DatasetLoader
```

## ๐Ÿ“‹ Contoh Penggunaan

### 1. ๐Ÿงน TextCleaner - Membersihkan Teks

```python
from nahiarhdNLP.preprocessing import TextCleaner

cleaner = TextCleaner()

# Membersihkan HTML tags
html_text = "website <a href='https://google.com'>google</a>"
clean_result = cleaner.clean_html(html_text)
print(clean_result)
# Output: "website google"

# Membersihkan URL
url_text = "kunjungi https://google.com sekarang!"
clean_result = cleaner.clean_urls(url_text)
print(clean_result)
# Output: "kunjungi sekarang!"

# Membersihkan mentions
mention_text = "Halo @user123 apa kabar?"
clean_result = cleaner.clean_mentions(mention_text)
print(clean_result)
# Output: "Halo apa kabar?"

# Membersihkan emoji
emoji_text = "Halo dunia ๐Ÿ˜€๐Ÿ˜ apa kabar? ๐ŸŽ‰"
clean_result = cleaner.clean_emoji(emoji_text)
print(clean_result)
# Output: "Halo dunia apa kabar?"
```

### 2. โœ๏ธ SpellCorrector - Koreksi Ejaan & Normalisasi Slang

```python
from nahiarhdNLP.preprocessing import SpellCorrector

spell = SpellCorrector()

# Koreksi kata salah eja
word = "sya"
corrected = spell.correct_word(word)
print(corrected)
# Output: "saya"

# Koreksi kalimat lengkap (termasuk normalisasi slang)
sentence = "sya suka mkn nasi"
corrected = spell.correct_sentence(sentence)
print(corrected)
# Output: "saya suka makan nasi"

# Normalisasi slang
slang_text = "gw lg di rmh"
normalized = spell.correct_sentence(slang_text)
print(normalized)
# Output: "gue lagi di rumah"
```

### 3. ๐Ÿšซ StopwordRemover - Menghapus Stopwords

```python
from nahiarhdNLP.preprocessing import StopwordRemover

stopword = StopwordRemover()
stopword._load_data()  # Load dataset stopwords

# Menghapus stopwords
text = "saya suka makan nasi goreng"
result = stopword.remove_stopwords(text)
print(result)
# Output: "suka makan nasi goreng"

# Cek apakah kata adalah stopword
is_stop = stopword.is_stopword("adalah")
print(is_stop)  # True
```

### 4. ๐Ÿ˜€ EmojiConverter - Konversi Emoji

```python
from nahiarhdNLP.preprocessing import EmojiConverter

emoji = EmojiConverter()
emoji._load_data()  # Load dataset emoji

# Emoji ke teks
emoji_text = "๐Ÿ˜€ ๐Ÿ˜‚ ๐Ÿ˜"
text_result = emoji.emoji_to_text_convert(emoji_text)
print(text_result)
# Output: "wajah_gembira wajah_gembira_berurai_air_mata wajah_tersenyum_lebar_bermata_hati"

# Teks ke emoji
text = "wajah_gembira"
emoji_result = emoji.text_to_emoji_convert(text)
print(emoji_result)
# Output: "๐Ÿ˜€"
```

### 5. ๐Ÿ”ช Tokenizer - Tokenisasi

```python
from nahiarhdNLP.preprocessing import Tokenizer

tokenizer = Tokenizer()

# Tokenisasi teks
text = "Saya suka makan nasi"
tokens = tokenizer.tokenize(text)
print(tokens)
# Output: ['Saya', 'suka', 'makan', 'nasi']
```

### 6. ๐ŸŒฟ Stemmer - Stemming

```python
from nahiarhdNLP.preprocessing import Stemmer

try:
    stemmer = Stemmer()
    text = "bermain-main dengan senang"
    result = stemmer.stem(text)
    print(result)
    # Output: "main main dengan senang"
except ImportError:
    print("Install Sastrawi dengan: pip install Sastrawi")
```

### 7. ๐Ÿ› ๏ธ Fungsi Individual

```python
from nahiarhdNLP.preprocessing import (
    remove_html, remove_emoji, remove_url, remove_mentions, remove_hashtags,
    remove_numbers, remove_punctuation, remove_extra_spaces,
    remove_special_chars, remove_whitespace, to_lowercase,
    replace_spell_corrector, replace_repeated_chars,
    emoji_to_words, words_to_emoji, remove_stopwords,
    stem_text, tokenize
)

# ๐Ÿงน FUNGSI PEMBERSIHAN DASAR

# Menghapus HTML tags
html_text = "website <a href='https://google.com'>google</a>"
clean_result = remove_html(html_text)
print(clean_result)
# Output: "website google"

# Menghapus emoji
emoji_text = "Halo dunia ๐Ÿ˜€๐Ÿ˜ apa kabar? ๐ŸŽ‰"
clean_result = remove_emoji(emoji_text)
print(clean_result)
# Output: "Halo dunia apa kabar?"

# Menghapus URL
url_text = "kunjungi https://google.com sekarang!"
clean_result = remove_url(url_text)
print(clean_result)
# Output: "kunjungi sekarang!"

# Menghapus mentions (@username)
mention_text = "Halo @user123 dan @admin apa kabar?"
clean_result = remove_mentions(mention_text)
print(clean_result)
# Output: "Halo dan apa kabar?"

# Menghapus hashtags (#tag)
hashtag_text = "Hari ini #senin #libur #weekend"
clean_result = remove_hashtags(hashtag_text)
print(clean_result)
# Output: "Hari ini"

# โœจ FUNGSI NORMALISASI DAN KOREKSI

# Normalisasi slang dan koreksi ejaan
slang_text = "emg siapa yg nanya?"
normal_text = replace_spell_corrector(slang_text)
print(normal_text)
# Output: "memang siapa yang bertanya?"

# Mengatasi perpanjangan kata (word elongation)
elongation_text = "kenapaaa???"
clean_result = replace_repeated_chars(elongation_text)
print(clean_result)
# Output: "kenapaa??"

# ๐Ÿ˜€ FUNGSI EMOJI

# Konversi emoji ke kata
emoji_text = "emoji ๐Ÿ˜€๐Ÿ˜"
text_result = emoji_to_words(emoji_text)
print(text_result)
# Output: "emoji wajah_gembira wajah_gembira_dengan_mata_bahagia"

# Konversi kata ke emoji
text_to_emoji = "emoji wajah_gembira"
emoji_result = words_to_emoji(text_to_emoji)
print(emoji_result)
# Output: "emoji ๐Ÿ˜€"

# ๐Ÿ”ฌ FUNGSI LINGUISTIC

# Menghapus stopwords
stopword_text = "siapa yang suruh makan?!!"
clean_result = remove_stopwords(stopword_text)
print(clean_result)
# Output: "suruh makan?!!"

# Stemming teks (memerlukan Sastrawi)
try:
    stem_text_input = "bermain-main dengan senang"
    stemmed = stem_text(stem_text_input)
    print(stemmed)
    # Output: "main main dengan senang"
except ImportError:
    print("Install Sastrawi: pip install Sastrawi")

# Tokenisasi teks
tokenize_text = "Saya suka makan nasi"
tokens = tokenize(tokenize_text)
print(tokens)
# Output: ['Saya', 'suka', 'makan', 'nasi']
```

### 8. ๐Ÿ”€ Pipeline - Preprocessing Sekaligus

Pipeline mendukung **dua cara penggunaan**:

#### A. ๐Ÿš€ Pipeline dengan Functions (Simple & Clean)

```python
from nahiarhdNLP.preprocessing import Pipeline, remove_html, remove_url, remove_mentions, to_lowercase

# Langsung pass functions yang mau dipakai
pipeline = Pipeline(remove_html, remove_url, remove_mentions)
result = pipeline.process("Hello <b>world</b> @user https://example.com")
print(result)
# Output: "Hello world"

# Bebas pilih functions sesuai kebutuhan
pipeline = Pipeline(remove_url, replace_spell_corrector, to_lowercase)
result = pipeline.process("Halooo https://google.com gw lg nyari info")
print(result)
# Output: "halooo gue lagi mencari info"

# Pipeline bisa dipanggil langsung seperti function
result = pipeline("Test text lainnya")
print(result)

# Contoh untuk social media text
social_pipeline = Pipeline(
    remove_mentions,
    remove_hashtags,
    remove_emoji,
    remove_url,
    replace_spell_corrector,
    to_lowercase
)
result = social_pipeline.process("Halooo @user #trending ๐Ÿ˜€ https://example.com gw lg nyari info")
print(result)
# Output: "halooo gue lagi mencari info"

# Tokenisasi juga bisa langsung
token_pipeline = Pipeline(remove_url, to_lowercase, tokenize)
tokens = token_pipeline.process("Hello https://google.com World")
print(tokens)  # ['hello', 'world']
```

#### B. ๐ŸŽฏ Pipeline dengan Config Dictionary (Advanced)

```python
from nahiarhdNLP.preprocessing import Pipeline

# Config dictionary untuk kontrol detail
config = {
    "remove_emoji": True,
    "remove_url": True,
    "remove_mentions": True,
    "remove_hashtags": True,
    "remove_numbers": True,
    "replace_spell_corrector": True,
    "to_lowercase": True,
    "remove_punctuation": True,
}

pipeline = Pipeline(config)
result = pipeline.process("Halooo @user123 #trending https://example.com gw lg nyari info pnting ๐Ÿ˜€!!! 123")
print(result)
# Output: "halo gue lagi mencari info penting ๐Ÿ˜€!!! 123"

# Pipeline dengan tokenisasi
tokenize_config = {
    "remove_url": True,
    "remove_mentions": True,
    "replace_spell_corrector": True,
    "to_lowercase": True,
    "tokenize": True,
}

pipe = Pipeline(tokenize_config)
result = pipe.process("gw suka makan nasi @user")
print(result)
# Output: ['gue', 'suka', 'makan', 'nasi']

# Advanced features untuk config mode
print("Current config:", pipeline.get_config())
print("Enabled steps:", pipeline.get_enabled_steps())

# Update configuration
pipeline.update_config({"tokenize": True, "remove_stopwords": True})
```

#### C. ๐Ÿ”ง Helper Function pipeline()

```python
from nahiarhdNLP.preprocessing import pipeline

# Preprocessing langsung dengan config
config = {"remove_url": True, "replace_spell_corrector": True, "to_lowercase": True}
result = pipeline("Gw lg browsing https://google.com", config)
print(result)
# Output: "gue lagi rosin"
```

#### ๐Ÿ“ Available Functions untuk Pipeline

```python
# Basic cleaning
remove_html, remove_emoji, remove_url, remove_mentions, remove_hashtags,
remove_numbers, remove_punctuation, remove_special_chars,
remove_whitespace, remove_extra_spaces

# Text transformation
to_lowercase, replace_repeated_chars, replace_spell_corrector

# Emoji handling
emoji_to_words, words_to_emoji

# Linguistic processing
remove_stopwords, stem_text, tokenize
```

### 9. ๐ŸŽ›๏ธ Preprocess Function (Backward Compatibility)

```python
from nahiarhdNLP.preprocessing import preprocess

# Preprocessing dengan parameter eksplisit
result = preprocess(
    "Halooo @user!!! 123 ๐Ÿ˜€",
    remove_emoji=True,
    remove_mentions=True,
    remove_numbers=True,
    remove_punctuation=True,
    replace_repeated_chars=True,
    to_lowercase=True,
    replace_spell_corrector=False,
)
print(result)
# Output: "haloo !! 123"
```

### 10. ๐Ÿ“Š Dataset Loader

```python
from nahiarhdNLP.datasets import DatasetLoader

loader = DatasetLoader()

# Load stopwords dari CSV lokal
stopwords = loader.load_stopwords_dataset()
print(f"Jumlah stopwords: {len(stopwords)}")

# Load slang dictionary dari CSV lokal
slang_dict = loader.load_slang_dataset()
print(f"Jumlah slang: {len(slang_dict)}")

# Load emoji dictionary dari CSV lokal
emoji_dict = loader.load_emoji_dataset()
print(f"Jumlah emoji: {len(emoji_dict)}")

# Load wordlist dari JSON lokal
wordlist = loader.load_wordlist_dataset()
print(f"Jumlah kata: {len(wordlist)}")
```

> **Catatan:** Semua dataset (stopword, slang, emoji, wordlist) di-load langsung dari file CSV/JSON di folder `nahiarhdNLP/datasets/`. Tidak ada proses download dari external source.

## ๐Ÿ”ฅ Demo Script

Untuk melihat semua fitur library bekerja:

```bash
python -m nahiarhdNLP.demo
```

Demo ini menunjukkan:

- โœ… Semua fungsi individual utility
- โœ… Penggunaan class-based approach
- โœ… Pipeline system (functions & config)
- โœ… Advanced pipeline features
- โœ… Handling error dan troubleshooting

## ๐Ÿšจ Error Handling

```python
try:
    from nahiarhdNLP.preprocessing import SpellCorrector
    spell = SpellCorrector()
    result = spell.correct_sentence("test")
except ImportError:
    print("Package nahiarhdNLP belum terinstall")
    print("Install dengan: pip install nahiarhdNLP")
except Exception as e:
    print(f"Error: {e}")
```

## ๐Ÿ’ก Tips Penggunaan

1. **Untuk preprocessing simple**: Gunakan `Pipeline(function1, function2, ...)` - langsung pass functions!
2. **Untuk kontrol detail**: Gunakan `Pipeline(config_dict)` atau `preprocess()` dengan parameter boolean
3. **Untuk kontrol penuh**: Gunakan kelas individual (`TextCleaner`, `SpellCorrector`, dll)
4. **Untuk spell correction + slang**: Gunakan `SpellCorrector` yang menggabungkan kedua fitur
5. **Untuk menghapus emoji**: Gunakan `remove_emoji()` atau set `remove_emoji=True` di Pipeline/preprocess
6. **Untuk stemming**: Install Sastrawi terlebih dahulu: `pip install Sastrawi`
7. **Untuk load dataset**: Gunakan `DatasetLoader` dari `nahiarhdNLP.datasets`
8. **Untuk inisialisasi kelas**: Panggil `_load_data()` untuk kelas yang memerlukan dataset
9. **Pipeline design**: `Pipeline(remove_url, to_lowercase)` lebih jelas daripada config dictionary
10. **Function chaining**: Pipeline bisa dipanggil seperti function dengan `pipeline("text")`
11. **Demo testing**: Jalankan `python -m nahiarhdNLP.demo` untuk melihat semua fitur bekerja

## โšก Performance & Dataset

nahiarhdNLP menggunakan **dataset lokal** yang sudah disediakan:

- **Stopwords**: File `stop_word.csv` (788 kata)
- **Slang Dictionary**: File `slang.csv` (15,675 pasangan)
- **Emoji Mapping**: File `emoji.csv` (3,530 emoji)
- **Wordlist**: File `wordlist.json` (kamus kata Indonesia)
- **KBBI Dictionary**: File `kata_dasar_kbbi.csv` (28,527 kata)
- **Kamus Tambahan**: File `kamus.txt` (30,871 kata)

Semua dataset tersimpan di folder `nahiarhdNLP/datasets/` dan diakses melalui `DatasetLoader`.

## ๐Ÿ“ฆ Dependencies

Package ini membutuhkan:

- `pandas` - untuk load dan proses dataset CSV/JSON
- `Sastrawi` - untuk stemming (opsional)
- `rich` - untuk output formatting di demo (opsional)

## ๐Ÿ”ง Struktur Modul

```text
nahiarhdNLP/
โ”œโ”€โ”€ datasets/
โ”‚   โ”œโ”€โ”€ loaders.py          # DatasetLoader class
โ”‚   โ”œโ”€โ”€ emoji.csv           # Dataset emoji (3,530 entries)
โ”‚   โ”œโ”€โ”€ slang.csv           # Dataset slang (15,675 entries)
โ”‚   โ”œโ”€โ”€ stop_word.csv       # Dataset stopwords (788 entries)
โ”‚   โ”œโ”€โ”€ wordlist.json       # Dataset wordlist
โ”‚   โ”œโ”€โ”€ kata_dasar_kbbi.csv # Dataset KBBI (28,527 entries)
โ”‚   โ””โ”€โ”€ kamus.txt           # Dataset kamus tambahan (30,871 entries)
โ”œโ”€โ”€ preprocessing/
โ”‚   โ”œโ”€โ”€ cleaning/
โ”‚   โ”‚   โ””โ”€โ”€ text_cleaner.py # TextCleaner class
โ”‚   โ”œโ”€โ”€ linguistic/
โ”‚   โ”‚   โ”œโ”€โ”€ stemmer.py      # Stemmer class
โ”‚   โ”‚   โ””โ”€โ”€ stopwords.py    # StopwordRemover class
โ”‚   โ”œโ”€โ”€ normalization/
โ”‚   โ”‚   โ”œโ”€โ”€ emoji.py        # EmojiConverter class
โ”‚   โ”‚   โ””โ”€โ”€ spell_corrector.py # SpellCorrector class
โ”‚   โ”œโ”€โ”€ tokenization/
โ”‚   โ”‚   โ””โ”€โ”€ tokenizer.py    # Tokenizer class
โ”‚   โ””โ”€โ”€ utils.py            # Fungsi utility individual & Pipeline
โ””โ”€โ”€ demo.py                 # File demo penggunaan
```

## ๐Ÿ†• Changelog Versi 1.5.0

- ๐Ÿš€ **[FITUR BARU]** Menambahkan `remove_emoji()` function untuk menghapus emoji dari teks
- โœ… **[BARU]** TextCleaner sekarang memiliki method `clean_emoji()` untuk menghapus emoji
- โœ… **[BARU]** Pipeline mendukung "remove_emoji" config untuk emoji removal
- โœ… **[BARU]** Preprocess function mendukung parameter `remove_emoji=True/False`
- โœ… **[PERBAIKAN]** Demo script diperbarui dengan contoh emoji removal
- โœ… **[PERBAIKAN]** Dokumentasi lengkap untuk fitur emoji removal
- ๐Ÿš€ **[MAJOR]** Pipeline sekarang mendukung 2 mode: Functions dan Config Dictionary
- โœ… **[BARU]** Pipeline dengan functions: `Pipeline(remove_url, to_lowercase)`
- โœ… **[BARU]** Pipeline dengan config: `Pipeline({"remove_url": True, "to_lowercase": True})`
- โœ… **[BARU]** Advanced pipeline features: `get_config()`, `get_enabled_steps()`, `update_config()`
- โœ… **[PERBAIKAN]** Fungsi `pipeline(text, config)` sekarang bekerja dengan config dictionary
- โœ… **[PERBAIKAN]** TextCleaner sekarang punya method `clean_html()` yang benar
- โœ… **[PERBAIKAN]** SpellCorrector demo diperbaiki dengan proper instantiation
- โœ… **[PERBAIKAN]** Demo script berjalan sempurna tanpa error
- โœ… **[PERBAIKAN]** Dokumentasi yang akurat dan sesuai implementasi
- โœ… **[PERBAIKAN]** Function names yang konsisten: `replace_spell_corrector`, `replace_repeated_chars`
- โœ… **[PERBAIKAN]** Backward compatibility dengan `preprocess()` function
- โœ… Menggabungkan spell correction dan slang normalization dalam `SpellCorrector`
- โœ… Semua dataset menggunakan file lokal (CSV/JSON)
- โœ… Struktur yang lebih terorganisir dengan pemisahan kelas dan fungsi
- โœ… Penambahan `DatasetLoader` untuk manajemen dataset terpusat
- โœ… Dataset lengkap dengan 6 file berbeda (emoji, slang, stopwords, wordlist, KBBI, kamus)

## ๐Ÿ› Troubleshooting

**Error saat import dataset:**

```python
# Pastikan memanggil _load_data() untuk kelas yang memerlukan dataset
stopword = StopwordRemover()
stopword._load_data()  # Penting!
```

**Error Sastrawi tidak ditemukan:**

```bash
pip install Sastrawi
```

**Error pandas tidak ditemukan:**

```bash
pip install pandas
```

**Testing semua fitur:**

```bash
python -m nahiarhdNLP.demo
```

## ๐Ÿ“„ License

MIT License

## ๐Ÿ‘จโ€๐Ÿ’ป Author

Raihan Hidayatullah Djunaedi [raihanhd.dev@gmail.com](mailto:raihanhd.dev@gmail.com)

---

Untuk contoh penggunaan lengkap, lihat file `demo.py` di repository ini atau jalankan `python -m nahiarhdNLP.demo`.

            

Raw data

            {
    "_id": null,
    "home_page": null,
    "name": "nahiarhdNLP",
    "maintainer": null,
    "docs_url": null,
    "requires_python": ">=3.8",
    "maintainer_email": null,
    "keywords": "nlp, indonesian, natural-language-processing, text-processing, bahasa-indonesia",
    "author": null,
    "author_email": "Raihan Hidayatullah Djunaedi <raihanhd.dev@gmail.com>",
    "download_url": "https://files.pythonhosted.org/packages/27/2d/bcee0ae8c416649c041e98cdc9740455e2f2747af67399ce654f1c2fa6db/nahiarhdnlp-1.3.2.tar.gz",
    "platform": null,
    "description": "# nahiarhdNLP - Indonesian Natural Language Processing Library\n\nLibrary Indonesian Natural Language Processing dengan fitur preprocessing teks, normalisasi slang, konversi emoji, koreksi ejaan, dan berbagai fungsi text processing lainnya.\n\n## \ud83d\ude80 Instalasi\n\n```bash\npip install nahiarhdNLP\n```\n\n## \ud83d\udce6 Import Library\n\n```python\n# Import functions dari preprocessing\nfrom nahiarhdNLP.preprocessing import (\n    # Fungsi pembersihan dasar\n    remove_html, remove_emoji, remove_url, remove_mentions, remove_hashtags,\n    remove_numbers, remove_punctuation, remove_extra_spaces,\n    remove_special_chars, remove_whitespace, to_lowercase,\n    # Fungsi normalisasi dan koreksi\n    replace_spell_corrector, replace_repeated_chars,\n    # Fungsi emoji\n    emoji_to_words, words_to_emoji,\n    # Fungsi linguistic\n    remove_stopwords, stem_text, tokenize,\n    # Fungsi pipeline\n    pipeline, preprocess, Pipeline\n)\n\n# Import kelas untuk penggunaan advanced\nfrom nahiarhdNLP.preprocessing import (\n    TextCleaner, SpellCorrector, StopwordRemover,\n    Stemmer, EmojiConverter, Tokenizer\n)\n\n# Import dataset loader\nfrom nahiarhdNLP.datasets import DatasetLoader\n```\n\n## \ud83d\udccb Contoh Penggunaan\n\n### 1. \ud83e\uddf9 TextCleaner - Membersihkan Teks\n\n```python\nfrom nahiarhdNLP.preprocessing import TextCleaner\n\ncleaner = TextCleaner()\n\n# Membersihkan HTML tags\nhtml_text = \"website <a href='https://google.com'>google</a>\"\nclean_result = cleaner.clean_html(html_text)\nprint(clean_result)\n# Output: \"website google\"\n\n# Membersihkan URL\nurl_text = \"kunjungi https://google.com sekarang!\"\nclean_result = cleaner.clean_urls(url_text)\nprint(clean_result)\n# Output: \"kunjungi sekarang!\"\n\n# Membersihkan mentions\nmention_text = \"Halo @user123 apa kabar?\"\nclean_result = cleaner.clean_mentions(mention_text)\nprint(clean_result)\n# Output: \"Halo apa kabar?\"\n\n# Membersihkan emoji\nemoji_text = \"Halo dunia \ud83d\ude00\ud83d\ude01 apa kabar? \ud83c\udf89\"\nclean_result = cleaner.clean_emoji(emoji_text)\nprint(clean_result)\n# Output: \"Halo dunia apa kabar?\"\n```\n\n### 2. \u270f\ufe0f SpellCorrector - Koreksi Ejaan & Normalisasi Slang\n\n```python\nfrom nahiarhdNLP.preprocessing import SpellCorrector\n\nspell = SpellCorrector()\n\n# Koreksi kata salah eja\nword = \"sya\"\ncorrected = spell.correct_word(word)\nprint(corrected)\n# Output: \"saya\"\n\n# Koreksi kalimat lengkap (termasuk normalisasi slang)\nsentence = \"sya suka mkn nasi\"\ncorrected = spell.correct_sentence(sentence)\nprint(corrected)\n# Output: \"saya suka makan nasi\"\n\n# Normalisasi slang\nslang_text = \"gw lg di rmh\"\nnormalized = spell.correct_sentence(slang_text)\nprint(normalized)\n# Output: \"gue lagi di rumah\"\n```\n\n### 3. \ud83d\udeab StopwordRemover - Menghapus Stopwords\n\n```python\nfrom nahiarhdNLP.preprocessing import StopwordRemover\n\nstopword = StopwordRemover()\nstopword._load_data()  # Load dataset stopwords\n\n# Menghapus stopwords\ntext = \"saya suka makan nasi goreng\"\nresult = stopword.remove_stopwords(text)\nprint(result)\n# Output: \"suka makan nasi goreng\"\n\n# Cek apakah kata adalah stopword\nis_stop = stopword.is_stopword(\"adalah\")\nprint(is_stop)  # True\n```\n\n### 4. \ud83d\ude00 EmojiConverter - Konversi Emoji\n\n```python\nfrom nahiarhdNLP.preprocessing import EmojiConverter\n\nemoji = EmojiConverter()\nemoji._load_data()  # Load dataset emoji\n\n# Emoji ke teks\nemoji_text = \"\ud83d\ude00 \ud83d\ude02 \ud83d\ude0d\"\ntext_result = emoji.emoji_to_text_convert(emoji_text)\nprint(text_result)\n# Output: \"wajah_gembira wajah_gembira_berurai_air_mata wajah_tersenyum_lebar_bermata_hati\"\n\n# Teks ke emoji\ntext = \"wajah_gembira\"\nemoji_result = emoji.text_to_emoji_convert(text)\nprint(emoji_result)\n# Output: \"\ud83d\ude00\"\n```\n\n### 5. \ud83d\udd2a Tokenizer - Tokenisasi\n\n```python\nfrom nahiarhdNLP.preprocessing import Tokenizer\n\ntokenizer = Tokenizer()\n\n# Tokenisasi teks\ntext = \"Saya suka makan nasi\"\ntokens = tokenizer.tokenize(text)\nprint(tokens)\n# Output: ['Saya', 'suka', 'makan', 'nasi']\n```\n\n### 6. \ud83c\udf3f Stemmer - Stemming\n\n```python\nfrom nahiarhdNLP.preprocessing import Stemmer\n\ntry:\n    stemmer = Stemmer()\n    text = \"bermain-main dengan senang\"\n    result = stemmer.stem(text)\n    print(result)\n    # Output: \"main main dengan senang\"\nexcept ImportError:\n    print(\"Install Sastrawi dengan: pip install Sastrawi\")\n```\n\n### 7. \ud83d\udee0\ufe0f Fungsi Individual\n\n```python\nfrom nahiarhdNLP.preprocessing import (\n    remove_html, remove_emoji, remove_url, remove_mentions, remove_hashtags,\n    remove_numbers, remove_punctuation, remove_extra_spaces,\n    remove_special_chars, remove_whitespace, to_lowercase,\n    replace_spell_corrector, replace_repeated_chars,\n    emoji_to_words, words_to_emoji, remove_stopwords,\n    stem_text, tokenize\n)\n\n# \ud83e\uddf9 FUNGSI PEMBERSIHAN DASAR\n\n# Menghapus HTML tags\nhtml_text = \"website <a href='https://google.com'>google</a>\"\nclean_result = remove_html(html_text)\nprint(clean_result)\n# Output: \"website google\"\n\n# Menghapus emoji\nemoji_text = \"Halo dunia \ud83d\ude00\ud83d\ude01 apa kabar? \ud83c\udf89\"\nclean_result = remove_emoji(emoji_text)\nprint(clean_result)\n# Output: \"Halo dunia apa kabar?\"\n\n# Menghapus URL\nurl_text = \"kunjungi https://google.com sekarang!\"\nclean_result = remove_url(url_text)\nprint(clean_result)\n# Output: \"kunjungi sekarang!\"\n\n# Menghapus mentions (@username)\nmention_text = \"Halo @user123 dan @admin apa kabar?\"\nclean_result = remove_mentions(mention_text)\nprint(clean_result)\n# Output: \"Halo dan apa kabar?\"\n\n# Menghapus hashtags (#tag)\nhashtag_text = \"Hari ini #senin #libur #weekend\"\nclean_result = remove_hashtags(hashtag_text)\nprint(clean_result)\n# Output: \"Hari ini\"\n\n# \u2728 FUNGSI NORMALISASI DAN KOREKSI\n\n# Normalisasi slang dan koreksi ejaan\nslang_text = \"emg siapa yg nanya?\"\nnormal_text = replace_spell_corrector(slang_text)\nprint(normal_text)\n# Output: \"memang siapa yang bertanya?\"\n\n# Mengatasi perpanjangan kata (word elongation)\nelongation_text = \"kenapaaa???\"\nclean_result = replace_repeated_chars(elongation_text)\nprint(clean_result)\n# Output: \"kenapaa??\"\n\n# \ud83d\ude00 FUNGSI EMOJI\n\n# Konversi emoji ke kata\nemoji_text = \"emoji \ud83d\ude00\ud83d\ude01\"\ntext_result = emoji_to_words(emoji_text)\nprint(text_result)\n# Output: \"emoji wajah_gembira wajah_gembira_dengan_mata_bahagia\"\n\n# Konversi kata ke emoji\ntext_to_emoji = \"emoji wajah_gembira\"\nemoji_result = words_to_emoji(text_to_emoji)\nprint(emoji_result)\n# Output: \"emoji \ud83d\ude00\"\n\n# \ud83d\udd2c FUNGSI LINGUISTIC\n\n# Menghapus stopwords\nstopword_text = \"siapa yang suruh makan?!!\"\nclean_result = remove_stopwords(stopword_text)\nprint(clean_result)\n# Output: \"suruh makan?!!\"\n\n# Stemming teks (memerlukan Sastrawi)\ntry:\n    stem_text_input = \"bermain-main dengan senang\"\n    stemmed = stem_text(stem_text_input)\n    print(stemmed)\n    # Output: \"main main dengan senang\"\nexcept ImportError:\n    print(\"Install Sastrawi: pip install Sastrawi\")\n\n# Tokenisasi teks\ntokenize_text = \"Saya suka makan nasi\"\ntokens = tokenize(tokenize_text)\nprint(tokens)\n# Output: ['Saya', 'suka', 'makan', 'nasi']\n```\n\n### 8. \ud83d\udd00 Pipeline - Preprocessing Sekaligus\n\nPipeline mendukung **dua cara penggunaan**:\n\n#### A. \ud83d\ude80 Pipeline dengan Functions (Simple & Clean)\n\n```python\nfrom nahiarhdNLP.preprocessing import Pipeline, remove_html, remove_url, remove_mentions, to_lowercase\n\n# Langsung pass functions yang mau dipakai\npipeline = Pipeline(remove_html, remove_url, remove_mentions)\nresult = pipeline.process(\"Hello <b>world</b> @user https://example.com\")\nprint(result)\n# Output: \"Hello world\"\n\n# Bebas pilih functions sesuai kebutuhan\npipeline = Pipeline(remove_url, replace_spell_corrector, to_lowercase)\nresult = pipeline.process(\"Halooo https://google.com gw lg nyari info\")\nprint(result)\n# Output: \"halooo gue lagi mencari info\"\n\n# Pipeline bisa dipanggil langsung seperti function\nresult = pipeline(\"Test text lainnya\")\nprint(result)\n\n# Contoh untuk social media text\nsocial_pipeline = Pipeline(\n    remove_mentions,\n    remove_hashtags,\n    remove_emoji,\n    remove_url,\n    replace_spell_corrector,\n    to_lowercase\n)\nresult = social_pipeline.process(\"Halooo @user #trending \ud83d\ude00 https://example.com gw lg nyari info\")\nprint(result)\n# Output: \"halooo gue lagi mencari info\"\n\n# Tokenisasi juga bisa langsung\ntoken_pipeline = Pipeline(remove_url, to_lowercase, tokenize)\ntokens = token_pipeline.process(\"Hello https://google.com World\")\nprint(tokens)  # ['hello', 'world']\n```\n\n#### B. \ud83c\udfaf Pipeline dengan Config Dictionary (Advanced)\n\n```python\nfrom nahiarhdNLP.preprocessing import Pipeline\n\n# Config dictionary untuk kontrol detail\nconfig = {\n    \"remove_emoji\": True,\n    \"remove_url\": True,\n    \"remove_mentions\": True,\n    \"remove_hashtags\": True,\n    \"remove_numbers\": True,\n    \"replace_spell_corrector\": True,\n    \"to_lowercase\": True,\n    \"remove_punctuation\": True,\n}\n\npipeline = Pipeline(config)\nresult = pipeline.process(\"Halooo @user123 #trending https://example.com gw lg nyari info pnting \ud83d\ude00!!! 123\")\nprint(result)\n# Output: \"halo gue lagi mencari info penting \ud83d\ude00!!! 123\"\n\n# Pipeline dengan tokenisasi\ntokenize_config = {\n    \"remove_url\": True,\n    \"remove_mentions\": True,\n    \"replace_spell_corrector\": True,\n    \"to_lowercase\": True,\n    \"tokenize\": True,\n}\n\npipe = Pipeline(tokenize_config)\nresult = pipe.process(\"gw suka makan nasi @user\")\nprint(result)\n# Output: ['gue', 'suka', 'makan', 'nasi']\n\n# Advanced features untuk config mode\nprint(\"Current config:\", pipeline.get_config())\nprint(\"Enabled steps:\", pipeline.get_enabled_steps())\n\n# Update configuration\npipeline.update_config({\"tokenize\": True, \"remove_stopwords\": True})\n```\n\n#### C. \ud83d\udd27 Helper Function pipeline()\n\n```python\nfrom nahiarhdNLP.preprocessing import pipeline\n\n# Preprocessing langsung dengan config\nconfig = {\"remove_url\": True, \"replace_spell_corrector\": True, \"to_lowercase\": True}\nresult = pipeline(\"Gw lg browsing https://google.com\", config)\nprint(result)\n# Output: \"gue lagi rosin\"\n```\n\n#### \ud83d\udcdd Available Functions untuk Pipeline\n\n```python\n# Basic cleaning\nremove_html, remove_emoji, remove_url, remove_mentions, remove_hashtags,\nremove_numbers, remove_punctuation, remove_special_chars,\nremove_whitespace, remove_extra_spaces\n\n# Text transformation\nto_lowercase, replace_repeated_chars, replace_spell_corrector\n\n# Emoji handling\nemoji_to_words, words_to_emoji\n\n# Linguistic processing\nremove_stopwords, stem_text, tokenize\n```\n\n### 9. \ud83c\udf9b\ufe0f Preprocess Function (Backward Compatibility)\n\n```python\nfrom nahiarhdNLP.preprocessing import preprocess\n\n# Preprocessing dengan parameter eksplisit\nresult = preprocess(\n    \"Halooo @user!!! 123 \ud83d\ude00\",\n    remove_emoji=True,\n    remove_mentions=True,\n    remove_numbers=True,\n    remove_punctuation=True,\n    replace_repeated_chars=True,\n    to_lowercase=True,\n    replace_spell_corrector=False,\n)\nprint(result)\n# Output: \"haloo !! 123\"\n```\n\n### 10. \ud83d\udcca Dataset Loader\n\n```python\nfrom nahiarhdNLP.datasets import DatasetLoader\n\nloader = DatasetLoader()\n\n# Load stopwords dari CSV lokal\nstopwords = loader.load_stopwords_dataset()\nprint(f\"Jumlah stopwords: {len(stopwords)}\")\n\n# Load slang dictionary dari CSV lokal\nslang_dict = loader.load_slang_dataset()\nprint(f\"Jumlah slang: {len(slang_dict)}\")\n\n# Load emoji dictionary dari CSV lokal\nemoji_dict = loader.load_emoji_dataset()\nprint(f\"Jumlah emoji: {len(emoji_dict)}\")\n\n# Load wordlist dari JSON lokal\nwordlist = loader.load_wordlist_dataset()\nprint(f\"Jumlah kata: {len(wordlist)}\")\n```\n\n> **Catatan:** Semua dataset (stopword, slang, emoji, wordlist) di-load langsung dari file CSV/JSON di folder `nahiarhdNLP/datasets/`. Tidak ada proses download dari external source.\n\n## \ud83d\udd25 Demo Script\n\nUntuk melihat semua fitur library bekerja:\n\n```bash\npython -m nahiarhdNLP.demo\n```\n\nDemo ini menunjukkan:\n\n- \u2705 Semua fungsi individual utility\n- \u2705 Penggunaan class-based approach\n- \u2705 Pipeline system (functions & config)\n- \u2705 Advanced pipeline features\n- \u2705 Handling error dan troubleshooting\n\n## \ud83d\udea8 Error Handling\n\n```python\ntry:\n    from nahiarhdNLP.preprocessing import SpellCorrector\n    spell = SpellCorrector()\n    result = spell.correct_sentence(\"test\")\nexcept ImportError:\n    print(\"Package nahiarhdNLP belum terinstall\")\n    print(\"Install dengan: pip install nahiarhdNLP\")\nexcept Exception as e:\n    print(f\"Error: {e}\")\n```\n\n## \ud83d\udca1 Tips Penggunaan\n\n1. **Untuk preprocessing simple**: Gunakan `Pipeline(function1, function2, ...)` - langsung pass functions!\n2. **Untuk kontrol detail**: Gunakan `Pipeline(config_dict)` atau `preprocess()` dengan parameter boolean\n3. **Untuk kontrol penuh**: Gunakan kelas individual (`TextCleaner`, `SpellCorrector`, dll)\n4. **Untuk spell correction + slang**: Gunakan `SpellCorrector` yang menggabungkan kedua fitur\n5. **Untuk menghapus emoji**: Gunakan `remove_emoji()` atau set `remove_emoji=True` di Pipeline/preprocess\n6. **Untuk stemming**: Install Sastrawi terlebih dahulu: `pip install Sastrawi`\n7. **Untuk load dataset**: Gunakan `DatasetLoader` dari `nahiarhdNLP.datasets`\n8. **Untuk inisialisasi kelas**: Panggil `_load_data()` untuk kelas yang memerlukan dataset\n9. **Pipeline design**: `Pipeline(remove_url, to_lowercase)` lebih jelas daripada config dictionary\n10. **Function chaining**: Pipeline bisa dipanggil seperti function dengan `pipeline(\"text\")`\n11. **Demo testing**: Jalankan `python -m nahiarhdNLP.demo` untuk melihat semua fitur bekerja\n\n## \u26a1 Performance & Dataset\n\nnahiarhdNLP menggunakan **dataset lokal** yang sudah disediakan:\n\n- **Stopwords**: File `stop_word.csv` (788 kata)\n- **Slang Dictionary**: File `slang.csv` (15,675 pasangan)\n- **Emoji Mapping**: File `emoji.csv` (3,530 emoji)\n- **Wordlist**: File `wordlist.json` (kamus kata Indonesia)\n- **KBBI Dictionary**: File `kata_dasar_kbbi.csv` (28,527 kata)\n- **Kamus Tambahan**: File `kamus.txt` (30,871 kata)\n\nSemua dataset tersimpan di folder `nahiarhdNLP/datasets/` dan diakses melalui `DatasetLoader`.\n\n## \ud83d\udce6 Dependencies\n\nPackage ini membutuhkan:\n\n- `pandas` - untuk load dan proses dataset CSV/JSON\n- `Sastrawi` - untuk stemming (opsional)\n- `rich` - untuk output formatting di demo (opsional)\n\n## \ud83d\udd27 Struktur Modul\n\n```text\nnahiarhdNLP/\n\u251c\u2500\u2500 datasets/\n\u2502   \u251c\u2500\u2500 loaders.py          # DatasetLoader class\n\u2502   \u251c\u2500\u2500 emoji.csv           # Dataset emoji (3,530 entries)\n\u2502   \u251c\u2500\u2500 slang.csv           # Dataset slang (15,675 entries)\n\u2502   \u251c\u2500\u2500 stop_word.csv       # Dataset stopwords (788 entries)\n\u2502   \u251c\u2500\u2500 wordlist.json       # Dataset wordlist\n\u2502   \u251c\u2500\u2500 kata_dasar_kbbi.csv # Dataset KBBI (28,527 entries)\n\u2502   \u2514\u2500\u2500 kamus.txt           # Dataset kamus tambahan (30,871 entries)\n\u251c\u2500\u2500 preprocessing/\n\u2502   \u251c\u2500\u2500 cleaning/\n\u2502   \u2502   \u2514\u2500\u2500 text_cleaner.py # TextCleaner class\n\u2502   \u251c\u2500\u2500 linguistic/\n\u2502   \u2502   \u251c\u2500\u2500 stemmer.py      # Stemmer class\n\u2502   \u2502   \u2514\u2500\u2500 stopwords.py    # StopwordRemover class\n\u2502   \u251c\u2500\u2500 normalization/\n\u2502   \u2502   \u251c\u2500\u2500 emoji.py        # EmojiConverter class\n\u2502   \u2502   \u2514\u2500\u2500 spell_corrector.py # SpellCorrector class\n\u2502   \u251c\u2500\u2500 tokenization/\n\u2502   \u2502   \u2514\u2500\u2500 tokenizer.py    # Tokenizer class\n\u2502   \u2514\u2500\u2500 utils.py            # Fungsi utility individual & Pipeline\n\u2514\u2500\u2500 demo.py                 # File demo penggunaan\n```\n\n## \ud83c\udd95 Changelog Versi 1.5.0\n\n- \ud83d\ude80 **[FITUR BARU]** Menambahkan `remove_emoji()` function untuk menghapus emoji dari teks\n- \u2705 **[BARU]** TextCleaner sekarang memiliki method `clean_emoji()` untuk menghapus emoji\n- \u2705 **[BARU]** Pipeline mendukung \"remove_emoji\" config untuk emoji removal\n- \u2705 **[BARU]** Preprocess function mendukung parameter `remove_emoji=True/False`\n- \u2705 **[PERBAIKAN]** Demo script diperbarui dengan contoh emoji removal\n- \u2705 **[PERBAIKAN]** Dokumentasi lengkap untuk fitur emoji removal\n- \ud83d\ude80 **[MAJOR]** Pipeline sekarang mendukung 2 mode: Functions dan Config Dictionary\n- \u2705 **[BARU]** Pipeline dengan functions: `Pipeline(remove_url, to_lowercase)`\n- \u2705 **[BARU]** Pipeline dengan config: `Pipeline({\"remove_url\": True, \"to_lowercase\": True})`\n- \u2705 **[BARU]** Advanced pipeline features: `get_config()`, `get_enabled_steps()`, `update_config()`\n- \u2705 **[PERBAIKAN]** Fungsi `pipeline(text, config)` sekarang bekerja dengan config dictionary\n- \u2705 **[PERBAIKAN]** TextCleaner sekarang punya method `clean_html()` yang benar\n- \u2705 **[PERBAIKAN]** SpellCorrector demo diperbaiki dengan proper instantiation\n- \u2705 **[PERBAIKAN]** Demo script berjalan sempurna tanpa error\n- \u2705 **[PERBAIKAN]** Dokumentasi yang akurat dan sesuai implementasi\n- \u2705 **[PERBAIKAN]** Function names yang konsisten: `replace_spell_corrector`, `replace_repeated_chars`\n- \u2705 **[PERBAIKAN]** Backward compatibility dengan `preprocess()` function\n- \u2705 Menggabungkan spell correction dan slang normalization dalam `SpellCorrector`\n- \u2705 Semua dataset menggunakan file lokal (CSV/JSON)\n- \u2705 Struktur yang lebih terorganisir dengan pemisahan kelas dan fungsi\n- \u2705 Penambahan `DatasetLoader` untuk manajemen dataset terpusat\n- \u2705 Dataset lengkap dengan 6 file berbeda (emoji, slang, stopwords, wordlist, KBBI, kamus)\n\n## \ud83d\udc1b Troubleshooting\n\n**Error saat import dataset:**\n\n```python\n# Pastikan memanggil _load_data() untuk kelas yang memerlukan dataset\nstopword = StopwordRemover()\nstopword._load_data()  # Penting!\n```\n\n**Error Sastrawi tidak ditemukan:**\n\n```bash\npip install Sastrawi\n```\n\n**Error pandas tidak ditemukan:**\n\n```bash\npip install pandas\n```\n\n**Testing semua fitur:**\n\n```bash\npython -m nahiarhdNLP.demo\n```\n\n## \ud83d\udcc4 License\n\nMIT License\n\n## \ud83d\udc68\u200d\ud83d\udcbb Author\n\nRaihan Hidayatullah Djunaedi [raihanhd.dev@gmail.com](mailto:raihanhd.dev@gmail.com)\n\n---\n\nUntuk contoh penggunaan lengkap, lihat file `demo.py` di repository ini atau jalankan `python -m nahiarhdNLP.demo`.\n",
    "bugtrack_url": null,
    "license": "MIT",
    "summary": "Advanced Indonesian Natural Language Processing Library",
    "version": "1.3.2",
    "project_urls": {
        "Documentation": "https://example.com",
        "Homepage": "https://example.com",
        "Issues": "https://github.com/raihanhd12/nahiarhdNLP/issues",
        "Repository": "https://github.com/raihanhd12/nahiarhdNLP"
    },
    "split_keywords": [
        "nlp",
        " indonesian",
        " natural-language-processing",
        " text-processing",
        " bahasa-indonesia"
    ],
    "urls": [
        {
            "comment_text": null,
            "digests": {
                "blake2b_256": "4917d4b53aae63c0104bebe5932940959743d85d283ec90352e8d4eebc5f30a3",
                "md5": "3c08dd97d04f874f6a0342d0fa81c3e7",
                "sha256": "b742cd2be667072089e888cff01bbc140cd8edd2bce444eeb2993f68ae40c339"
            },
            "downloads": -1,
            "filename": "nahiarhdnlp-1.3.2-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "3c08dd97d04f874f6a0342d0fa81c3e7",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": ">=3.8",
            "size": 789906,
            "upload_time": "2025-07-28T08:24:16",
            "upload_time_iso_8601": "2025-07-28T08:24:16.858924Z",
            "url": "https://files.pythonhosted.org/packages/49/17/d4b53aae63c0104bebe5932940959743d85d283ec90352e8d4eebc5f30a3/nahiarhdnlp-1.3.2-py3-none-any.whl",
            "yanked": false,
            "yanked_reason": null
        },
        {
            "comment_text": null,
            "digests": {
                "blake2b_256": "272dbcee0ae8c416649c041e98cdc9740455e2f2747af67399ce654f1c2fa6db",
                "md5": "b6f8c6a6cc01cbc1655fdaa2ecf706a0",
                "sha256": "c579f054541816ab9ed8cea584cd453f693a70aa194d874a567bd2e7f753a01a"
            },
            "downloads": -1,
            "filename": "nahiarhdnlp-1.3.2.tar.gz",
            "has_sig": false,
            "md5_digest": "b6f8c6a6cc01cbc1655fdaa2ecf706a0",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": ">=3.8",
            "size": 787148,
            "upload_time": "2025-07-28T08:24:19",
            "upload_time_iso_8601": "2025-07-28T08:24:19.032803Z",
            "url": "https://files.pythonhosted.org/packages/27/2d/bcee0ae8c416649c041e98cdc9740455e2f2747af67399ce654f1c2fa6db/nahiarhdnlp-1.3.2.tar.gz",
            "yanked": false,
            "yanked_reason": null
        }
    ],
    "upload_time": "2025-07-28 08:24:19",
    "github": true,
    "gitlab": false,
    "bitbucket": false,
    "codeberg": false,
    "github_user": "raihanhd12",
    "github_project": "nahiarhdNLP",
    "github_not_found": true,
    "lcname": "nahiarhdnlp"
}
        
Elapsed time: 0.52041s