From 53c65d1541cbf077ab17211844428e88ea298415 Mon Sep 17 00:00:00 2001 From: chee Date: Fri, 22 Sep 2017 22:09:11 +0100 Subject: [PATCH] commit the initial version of the library --- index.js | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 index.js diff --git a/index.js b/index.js new file mode 100644 index 0000000..1d1db0b --- /dev/null +++ b/index.js @@ -0,0 +1,130 @@ +const scrape = require('scrape-it') + +const ROOT = 'http://jisho.org/search' + +function removeFurigana (dom) { + return dom.not('.furigana').text().trim() +} + +function parseAudio (dom) { + return [].reduce.call(dom, (result, source) => { + const type = source + .attribs + .type + .replace('audio/', '') + result[type] = source.attribs.src + return result + }, {}) +} + +function parseAbstractUrl (dom) { + return dom.find('a').attr('href') +} + +function parseAbstract (dom) { + const readMore = dom.find('a') + readMore.remove() + return dom.text() +} + +function parseListToLowerCase (dom) { + return [].map.call(dom, + item => dom.constructor(item).text().toLowerCase() + ) +} + +function getConfig (options) { + return { + words: { + listItem: '#primary .concept_light', + data: { + furigana: '.concept_light-representation .furigana', + japanese: '.text', + tags: { + selector: '.concept_light-tag', + how: parseListToLowerCase + }, + meanings: { + listItem: '.meaning-wrapper', + data: { + english: '.meaning-meaning', + sentences: { + listItem: '.sentence', + data: { + japanese: '.japanese', + english: '.english' + } + } + } + }, + audio: { + selector: 'audio source', + how: parseAudio + } + } + }, + sentences: { + listItem: '#secondary .sentence', + tags: { + listItem: '.concept_light-tag' + }, + data: { + japanese: { + selector: '.japanese_sentence', + how: removeFurigana + }, + english: '.english_sentence' + } + }, + names: { + listItem: '#secondary .names .concept_light', + tags: { + listItem: '.concept_light-tag' + }, + data: { + japanese: '.japanese', + meanings: { + listItem: '.meaning-wrapper', + data: { + english: '.meaning-meaning', + url: { + selector: '.meaning-abstract', + how: parseAbstractUrl + }, + abstract: { + selector: '.meaning-abstract', + how: parseAbstract + } + } + } + } + } + } +} + +module.exports = function get (term, options, callback) { + const encodedTerm = encodeURIComponent(term) + + if (typeof options === 'function' && callback == null) { + callback = options + options = null + } + + return new Promise((resolve, reject) => { + const finish = (error, result) => { + if (callback) { + callback(error, result) + } + + return error + ? reject(error) + : resolve(result) + } + + scrape( + `${ROOT}/${encodedTerm}`, + getConfig(options), + finish + ) + }) +}