# LangaraCourseInfo
This project collects course and transfer information from Langara College, Vancouver, into an SQLite database.
Once built, the database weighs around 15 MB for all data (~250 MB with source HTML/PDFs), which should be lightweight enough for most uses.
The transfer agreement scraper currently takes an excruciating amount of time - approximately an hour - this will be improved with multithreading in the future.
# Collected Data
- Course Information: course description & other attributes
- Course Offerings: dating from 1999 - present.
- Transfer Information: only active transfer agreements are collected.
# Table Definitions
- TransferInformation(subject, course_code, source, destination, credit, effective_start, effective_end)
- CourseInfo(subject, course_code, credits, title, description, lecture_hours, seminar_hours, lab_hours, AR, SC, HUM, LSC, SCI, SOC, UT)
- Sections(year, term, seats, waitlist, subject, course_code, crn, credits, additional_fees, repeat_limit)
- Schedules(year, term, crn, type, days,, time, start_date, end_date, room, instructor)
- SemesterHTML(year, term, sectionHTML, catalogueHTML, attributeHTML)
- TransferPDF(subject, pdf)
# Stack
- SQLite
- Selenium
- Beautifulsoup
# Build
- `python -m build` Build the package.
- `twine upload -r pypi dist/*` Upload the package to pypi.
Raw data
{
"_id": null,
"home_page": "",
"name": "LangaraCourseInfo",
"maintainer": "",
"docs_url": null,
"requires_python": ">=3.9",
"maintainer_email": "",
"keywords": "langara,college,scraper,data",
"author": "",
"author_email": "Anderson T <tseng.andersonn@gmail.com>",
"download_url": "https://files.pythonhosted.org/packages/f7/45/7478f8a4260acd64a0da68126e8acf48961e392f75e6fd50d23ba31e1cd3/LangaraCourseInfo-2.0.0.tar.gz",
"platform": null,
"description": "# LangaraCourseInfo\r\n\r\nThis project collects course and transfer information from Langara College, Vancouver, into an SQLite database.\r\n\r\nOnce built, the database weighs around 15 MB for all data (~250 MB with source HTML/PDFs), which should be lightweight enough for most uses.\r\n\r\nThe transfer agreement scraper currently takes an excruciating amount of time - approximately an hour - this will be improved with multithreading in the future.\r\n\r\n# Collected Data\r\n\r\n - Course Information: course description & other attributes\r\n - Course Offerings: dating from 1999 - present.\r\n - Transfer Information: only active transfer agreements are collected.\r\n\r\n# Table Definitions\r\n - TransferInformation(subject, course_code, source, destination, credit, effective_start, effective_end)\r\n - CourseInfo(subject, course_code, credits, title, description, lecture_hours, seminar_hours, lab_hours, AR, SC, HUM, LSC, SCI, SOC, UT)\r\n - Sections(year, term, seats, waitlist, subject, course_code, crn, credits, additional_fees, repeat_limit)\r\n - Schedules(year, term, crn, type, days,, time, start_date, end_date, room, instructor)\r\n\r\n - SemesterHTML(year, term, sectionHTML, catalogueHTML, attributeHTML)\r\n - TransferPDF(subject, pdf)\r\n\r\n# Stack \r\n - SQLite\r\n - Selenium\r\n - Beautifulsoup\r\n\r\n# Build\r\n- `python -m build` Build the package.\r\n- `twine upload -r pypi dist/*` Upload the package to pypi.\r\n",
"bugtrack_url": null,
"license": "",
"summary": "Langara Course Information Aggregator",
"version": "2.0.0",
"project_urls": {
"Homepage": "https://github.com/Highfire1/LangaraCourseInfo"
},
"split_keywords": [
"langara",
"college",
"scraper",
"data"
],
"urls": [
{
"comment_text": "",
"digests": {
"blake2b_256": "62a1be8603c5f5196d191dd1539fab666c4a3f0f3bd256b877de0498ba0f8f43",
"md5": "b6b03b5b0b94fd43d5313beca9e26fd2",
"sha256": "ecb2496e8b8d9e522f4ba44455cc7afcc98ed53a16f967aa3f28ad71d3b629fd"
},
"downloads": -1,
"filename": "LangaraCourseInfo-2.0.0-py3-none-any.whl",
"has_sig": false,
"md5_digest": "b6b03b5b0b94fd43d5313beca9e26fd2",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.9",
"size": 26099,
"upload_time": "2023-11-27T22:41:19",
"upload_time_iso_8601": "2023-11-27T22:41:19.808026Z",
"url": "https://files.pythonhosted.org/packages/62/a1/be8603c5f5196d191dd1539fab666c4a3f0f3bd256b877de0498ba0f8f43/LangaraCourseInfo-2.0.0-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": "",
"digests": {
"blake2b_256": "f7457478f8a4260acd64a0da68126e8acf48961e392f75e6fd50d23ba31e1cd3",
"md5": "bd600ee680eb4efc396e790c9a70b52f",
"sha256": "f9d7a9a359177de9ff5e25dc17c4d330f34c73d01582e75df7cfa690f186610d"
},
"downloads": -1,
"filename": "LangaraCourseInfo-2.0.0.tar.gz",
"has_sig": false,
"md5_digest": "bd600ee680eb4efc396e790c9a70b52f",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.9",
"size": 7485844,
"upload_time": "2023-11-27T22:41:22",
"upload_time_iso_8601": "2023-11-27T22:41:22.927803Z",
"url": "https://files.pythonhosted.org/packages/f7/45/7478f8a4260acd64a0da68126e8acf48961e392f75e6fd50d23ba31e1cd3/LangaraCourseInfo-2.0.0.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"upload_time": "2023-11-27 22:41:22",
"github": true,
"gitlab": false,
"bitbucket": false,
"codeberg": false,
"github_user": "Highfire1",
"github_project": "LangaraCourseInfo",
"travis_ci": false,
"coveralls": false,
"github_actions": false,
"requirements": [
{
"name": "bs4",
"specs": []
},
{
"name": "lxml",
"specs": []
},
{
"name": "faust-cchardet",
"specs": []
},
{
"name": "requests",
"specs": []
},
{
"name": "pdfquery",
"specs": []
},
{
"name": "pydantic",
"specs": []
},
{
"name": "six",
"specs": []
},
{
"name": "selenium",
"specs": []
},
{
"name": "pymupdf",
"specs": []
}
],
"lcname": "langaracourseinfo"
}