diff --git a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/Queries.kt b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/DomToH2Queries.kt similarity index 95% rename from pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/Queries.kt rename to pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/DomToH2Queries.kt index 161bed8c9..6b1bd08c1 100644 --- a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/Queries.kt +++ b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/DomToH2Queries.kt @@ -40,7 +40,7 @@ import kotlin.math.roundToInt import kotlin.reflect.full.memberProperties import kotlin.reflect.full.primaryConstructor -object Queries { +object DomToH2Queries { private val logger = getLogger(this::class) /** @@ -205,11 +205,24 @@ object Queries { } fun selectFirstOrNull(dom: ValueDom, cssQuery: String, transformer: (Element) -> O): O? { - return dom.element.selectFirstOrNull(cssQuery, transformer) + val result = dom.element.selectFirstOrNull(cssQuery, transformer) + if (result != null && result is Element) { + // feature: mark element matching query + // select first element matched + // result.attr("sf-match") + } + return result } fun selectNthOrNull(dom: ValueDom, cssQuery: String, n: Int, transform: (Element) -> O): O? { - return dom.element.select(cssQuery, n, 1) { transform(it) }.firstOrNull() + val result = dom.element.select(cssQuery, n, 1).firstOrNull() + if (result != null) { + // feature: mark element matching query + // select n-th element matched + // result.attr("sn-match") + return transform(result) + } + return null } fun getTexts(ele: Element, restrictCss: String, offset: Int, limit: Int): Collection { diff --git a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomFunctionTables.kt b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomFunctionTables.kt index 4d988fc13..ab1d62caa 100644 --- a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomFunctionTables.kt +++ b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomFunctionTables.kt @@ -12,12 +12,12 @@ import ai.platon.pulsar.ql.ResultSets import ai.platon.pulsar.ql.annotation.UDFGroup import ai.platon.pulsar.ql.annotation.UDFunction import ai.platon.pulsar.ql.h2.H2SessionFactory -import ai.platon.pulsar.ql.h2.Queries -import ai.platon.pulsar.ql.h2.Queries.toResultSet +import ai.platon.pulsar.ql.h2.DomToH2Queries +import ai.platon.pulsar.ql.h2.DomToH2Queries.toResultSet import ai.platon.pulsar.ql.h2.domValue import ai.platon.pulsar.ql.types.ValueDom import ai.platon.pulsar.ql.annotation.H2Context -import ai.platon.pulsar.ql.h2.Queries.toDOMResultSet +import ai.platon.pulsar.ql.h2.DomToH2Queries.toDOMResultSet import org.h2.jdbc.JdbcConnection import org.h2.tools.SimpleResultSet import org.h2.value.DataType @@ -46,7 +46,7 @@ object DomFunctionTables { return toResultSet("DOM", listOf()) } - val pages = Queries.loadAll(session, urls) + val pages = DomToH2Queries.loadAll(session, urls) val doms = pages.map { session.parseValueDom(it) } return toResultSet("DOM", doms) @@ -92,7 +92,7 @@ object DomFunctionTables { return toResultSet("LINK", listOf()) } - val links = Queries.loadAll(session, portalUrl, restrictCss, offset, limit, Queries::getLinks) + val links = DomToH2Queries.loadAll(session, portalUrl, restrictCss, offset, limit, DomToH2Queries::getLinks) return toResultSet("LINK", links) } @@ -106,7 +106,7 @@ object DomFunctionTables { return toResultSet("LINK", listOf()) } - return toResultSet("LINK", Queries.getLinks(dom.element, cssQuery, offset, limit)) + return toResultSet("LINK", DomToH2Queries.getLinks(dom.element, cssQuery, offset, limit)) } @JvmStatic @@ -187,7 +187,7 @@ object DomFunctionTables { return toResultSet("DOM", listOf()) } - val docs = Queries.loadOutPages(session, portal, restrictCss, offset, limit, normalize, ignoreQuery) + val docs = DomToH2Queries.loadOutPages(session, portal, restrictCss, offset, limit, normalize, ignoreQuery) .map { session.parse(it) } val elements = if (targetCss == ":root") { @@ -239,7 +239,7 @@ object DomFunctionTables { return toResultSet("DOM", listOf()) } - val pages = Queries.loadOutPages(session, portalUrl, restrictCss, offset, limit, normalize, ignoreQuery) + val pages = DomToH2Queries.loadOutPages(session, portalUrl, restrictCss, offset, limit, normalize, ignoreQuery) val docs = pages.map { session.parse(it) } @@ -302,7 +302,7 @@ object DomFunctionTables { /** * Notice: be careful use rs.addRow(*it) to make sure a vararg is passed into rs.addRow */ - dom.element.select(cssSelector, offset, limit) { Queries.getFeatureRow(it) } + dom.element.select(cssSelector, offset, limit) { DomToH2Queries.getFeatureRow(it) } .forEach { rs.addRow(*it) } return rs @@ -368,7 +368,7 @@ object DomFunctionTables { val drop = max(offset - 1, 0) result.sortedByDescending { it.getFeature(SIB) }.asSequence() .drop(drop).take(limit) - .map { Queries.getFeatureRow(it) } + .map { DomToH2Queries.getFeatureRow(it) } .forEach { rs.addRow(it) } return rs diff --git a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomInlineSelectFunctions.kt b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomInlineSelectFunctions.kt index f0c8702fe..ddaaabd1b 100644 --- a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomInlineSelectFunctions.kt +++ b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomInlineSelectFunctions.kt @@ -3,7 +3,7 @@ package ai.platon.pulsar.ql.h2.udfs import ai.platon.pulsar.dom.select.select2 import ai.platon.pulsar.ql.annotation.UDFGroup import ai.platon.pulsar.ql.annotation.UDFunction -import ai.platon.pulsar.ql.h2.Queries +import ai.platon.pulsar.ql.h2.DomToH2Queries import ai.platon.pulsar.ql.types.ValueDom import org.h2.value.ValueArray import org.h2.value.ValueString @@ -20,14 +20,14 @@ object DomInlineSelectFunctions { @JvmStatic fun inlineSelect(dom: ValueDom, cssQuery: String): ValueArray { val elements = dom.element.select2(cssQuery) - return Queries.toValueArray(elements) + return DomToH2Queries.toValueArray(elements) } @UDFunction(description = "Select all match elements by the given css query from a DOM and return the result as an array of DOMs") @JvmStatic fun inlineSelect(dom: ValueDom, cssQuery: String, offset: Int, limit: Int): ValueArray { val elements = dom.element.select2(cssQuery, offset, limit) - return Queries.toValueArray(elements) + return DomToH2Queries.toValueArray(elements) } @UDFunction(description = "Select all match elements by the given css query from a DOM and return the result as an array of DOMs") diff --git a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomSelectFunctions.kt b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomSelectFunctions.kt index 308a758ce..b694b1a09 100644 --- a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomSelectFunctions.kt +++ b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/DomSelectFunctions.kt @@ -4,14 +4,13 @@ import ai.platon.pulsar.common.RegexExtractor import ai.platon.pulsar.common.Strings import ai.platon.pulsar.dom.nodes.A_LABELS import ai.platon.pulsar.dom.nodes.node.ext.minimalHtml -import ai.platon.pulsar.dom.nodes.node.ext.namedRect import ai.platon.pulsar.dom.nodes.node.ext.slimHtml import ai.platon.pulsar.dom.select.appendSelectorIfMissing import ai.platon.pulsar.dom.select.selectFirstOrNull import ai.platon.pulsar.dom.select.select2 import ai.platon.pulsar.ql.annotation.UDFGroup import ai.platon.pulsar.ql.annotation.UDFunction -import ai.platon.pulsar.ql.h2.Queries +import ai.platon.pulsar.ql.h2.DomToH2Queries import ai.platon.pulsar.ql.types.ValueDom import org.h2.value.ValueArray import org.h2.value.ValueFloat @@ -30,7 +29,7 @@ object DomSelectFunctions { @UDFunction(description = "Select the all the elements from a DOM by the given css query") @JvmStatic fun selectAll(dom: ValueDom, cssQuery: String): ValueArray { - return Queries.select(dom, cssQuery) { it } + return DomToH2Queries.select(dom, cssQuery) { it } } @UDFunction(description = "Select the first element from a DOM by the given css query and return a DOM") @@ -48,56 +47,56 @@ object DomSelectFunctions { @UDFunction(description = "Select all elements from a DOM by the given css query and return the the element texts") @JvmStatic fun allTexts(dom: ValueDom, cssQuery: String): ValueArray { - return Queries.select(dom, cssQuery) { it.text() } + return DomToH2Queries.select(dom, cssQuery) { it.text() } } @UDFunction(description = "Select the first element from a DOM by the given css query and return the element text") @JvmStatic fun firstText(dom: ValueDom, cssQuery: String): String { - return Queries.selectFirstOrNull(dom, cssQuery) { it.text() } ?: "" + return DomToH2Queries.selectFirstOrNull(dom, cssQuery) { it.text() } ?: "" } @UDFunction(description = "Select the nth element from a DOM by the given css query and return the element text") @JvmStatic fun nthText(dom: ValueDom, cssQuery: String, n: Int): String { - return Queries.selectNthOrNull(dom, cssQuery, n) { it.text() } ?: "" + return DomToH2Queries.selectNthOrNull(dom, cssQuery, n) { it.text() } ?: "" } @UDFunction(description = "Select all elements from a DOM by the given css query and return the the element's own texts") @JvmStatic fun allOwnTexts(dom: ValueDom, cssQuery: String): ValueArray { - return Queries.select(dom, cssQuery) { it.ownText() } + return DomToH2Queries.select(dom, cssQuery) { it.ownText() } } @UDFunction(description = "Select the first element from a DOM by the given css query and return the element's own text") @JvmStatic fun firstOwnText(dom: ValueDom, cssQuery: String): String { - return Queries.selectFirstOrNull(dom, cssQuery) { it.ownText() } ?: "" + return DomToH2Queries.selectFirstOrNull(dom, cssQuery) { it.ownText() } ?: "" } @UDFunction(description = "Select the nth element from a DOM by the given css query and return the element's own text") @JvmStatic fun nthOwnText(dom: ValueDom, cssQuery: String, n: Int): String { - return Queries.selectNthOrNull(dom, cssQuery, n) { it.ownText() } ?: "" + return DomToH2Queries.selectNthOrNull(dom, cssQuery, n) { it.ownText() } ?: "" } @UDFunction(description = "Select all elements from a DOM by the given css query and return the the element whole texts") @JvmStatic fun wholeTexts(dom: ValueDom, cssQuery: String): ValueArray { - return Queries.select(dom, cssQuery) { it.wholeText() } + return DomToH2Queries.select(dom, cssQuery) { it.wholeText() } } @UDFunction(description = "Select the first element from a DOM by the given css query and return the element whole text") @JvmStatic fun firstWholeText(dom: ValueDom, cssQuery: String): String { - return Queries.selectFirstOrNull(dom, cssQuery) { it.wholeText() } ?: "" + return DomToH2Queries.selectFirstOrNull(dom, cssQuery) { it.wholeText() } ?: "" } @UDFunction(description = "Select the nth element from a DOM by the given css query and return the element whole text") @JvmStatic fun nthWholeText(dom: ValueDom, cssQuery: String, n: Int): String { - return Queries.selectNthOrNull(dom, cssQuery, n) { it.wholeText() } ?: "" + return DomToH2Queries.selectNthOrNull(dom, cssQuery, n) { it.wholeText() } ?: "" } @@ -109,38 +108,38 @@ object DomSelectFunctions { @UDFunction(description = "Select all elements from a DOM by the given css query and return the the element texts") @JvmStatic fun allSlimHtmls(dom: ValueDom, cssQuery: String): ValueArray { - return Queries.select(dom, cssQuery) { it.slimHtml } + return DomToH2Queries.select(dom, cssQuery) { it.slimHtml } } @UDFunction(description = "Select the first element from a DOM by the given css query and return the element text") @JvmStatic fun firstSlimHtml(dom: ValueDom, cssQuery: String): String { - return Queries.selectFirstOrNull(dom, cssQuery) { it.slimHtml } ?: "" + return DomToH2Queries.selectFirstOrNull(dom, cssQuery) { it.slimHtml } ?: "" } @UDFunction(description = "Select the nth element from a DOM by the given css query and return the element text") @JvmStatic fun nthSlimHtml(dom: ValueDom, cssQuery: String, n: Int): String { - return Queries.selectNthOrNull(dom, cssQuery, n) { it.slimHtml } ?: "" + return DomToH2Queries.selectNthOrNull(dom, cssQuery, n) { it.slimHtml } ?: "" } @UDFunction(description = "Select all elements from a DOM by the given css query and return the the element texts") @JvmStatic fun allMinimalHtmls(dom: ValueDom, cssQuery: String): ValueArray { - return Queries.select(dom, cssQuery) { it.minimalHtml } + return DomToH2Queries.select(dom, cssQuery) { it.minimalHtml } } @UDFunction(description = "Select the first element from a DOM by the given css query and return the element text") @JvmStatic fun firstMinimalHtml(dom: ValueDom, cssQuery: String): String { - return Queries.selectFirstOrNull(dom, cssQuery) { it.minimalHtml } ?: "" + return DomToH2Queries.selectFirstOrNull(dom, cssQuery) { it.minimalHtml } ?: "" } @UDFunction(description = "Select the nth element from a DOM by the given css query and return the element text") @JvmStatic fun nthMinimalHtml(dom: ValueDom, cssQuery: String, n: Int): String { - return Queries.selectNthOrNull(dom, cssQuery, n) { it.minimalHtml } ?: "" + return DomToH2Queries.selectNthOrNull(dom, cssQuery, n) { it.minimalHtml } ?: "" } @@ -149,7 +148,7 @@ object DomSelectFunctions { @JvmStatic @JvmOverloads fun allIntegers(dom: ValueDom, cssQuery: String, defaultValue: Int = 0): ValueArray { - return Queries.select(dom, cssQuery) { ValueInt.get(Strings.getFirstInteger(it.text(), defaultValue)) } + return DomToH2Queries.select(dom, cssQuery) { ValueInt.get(Strings.getFirstInteger(it.text(), defaultValue)) } } @UDFunction(description = "Select the first element from a DOM by the given css query " + @@ -174,7 +173,7 @@ object DomSelectFunctions { @JvmStatic @JvmOverloads fun allFloats(dom: ValueDom, cssQuery: String, defaultValue: Float = 0.0f): ValueArray { - return Queries.select(dom, cssQuery) { ValueFloat.get(Strings.getFirstFloatNumber(it.text(), defaultValue)) } + return DomToH2Queries.select(dom, cssQuery) { ValueFloat.get(Strings.getFirstFloatNumber(it.text(), defaultValue)) } } /** @@ -203,7 +202,7 @@ object DomSelectFunctions { @JvmStatic @JvmOverloads fun allAttrs(dom: ValueDom, cssQuery: String = ":root", attrName: String): ValueArray { - return Queries.select(dom, cssQuery) { it.attr(attrName) } + return DomToH2Queries.select(dom, cssQuery) { it.attr(attrName) } } @UDFunction(description = "Select the first element from a DOM by the given css query " + @@ -211,14 +210,14 @@ object DomSelectFunctions { @JvmStatic @JvmOverloads fun firstAttr(dom: ValueDom, cssQuery: String = ":root", attrName: String): String { - return Queries.selectFirstOrNull(dom, cssQuery) { it.attr(attrName) } ?: "" + return DomToH2Queries.selectFirstOrNull(dom, cssQuery) { it.attr(attrName) } ?: "" } @UDFunction(description = "Select the nth element from a DOM by the given css query " + "and return the attribute value associated by the attribute name") @JvmStatic fun nthAttr(dom: ValueDom, cssQuery: String, n: Int, attrName: String): String { - return Queries.selectNthOrNull(dom, cssQuery, n) { it.attr(attrName) } ?: "" + return DomToH2Queries.selectNthOrNull(dom, cssQuery, n) { it.attr(attrName) } ?: "" } @@ -228,7 +227,7 @@ object DomSelectFunctions { @JvmStatic @JvmOverloads fun allMultiAttrs(dom: ValueDom, cssQuery: String = ":root", attrNames: Array): ValueArray { - return Queries.select(dom, cssQuery) { ele -> attrNames.map { ele.attr(it) } } + return DomToH2Queries.select(dom, cssQuery) { ele -> attrNames.map { ele.attr(it) } } } @UDFunction(description = "Select the first element from a DOM by the given css query " + @@ -236,7 +235,7 @@ object DomSelectFunctions { @JvmStatic @JvmOverloads fun firstMultiAttrs(dom: ValueDom, cssQuery: String = ":root", attrNames: Array): List { - val result = Queries.selectFirstOrNull(dom, cssQuery) { ele -> + val result = DomToH2Queries.selectFirstOrNull(dom, cssQuery) { ele -> attrNames.map { ele.attr(it) } } ?: listOf() @@ -247,7 +246,7 @@ object DomSelectFunctions { "and return the attribute value associated by the attribute name") @JvmStatic fun nthMultiAttrs(dom: ValueDom, cssQuery: String, n: Int, attrNames: Array): List { - val result = Queries.selectNthOrNull(dom, cssQuery, n) { ele -> + val result = DomToH2Queries.selectNthOrNull(dom, cssQuery, n) { ele -> attrNames.map { ele.attr(it) } } ?: listOf() @@ -261,7 +260,7 @@ object DomSelectFunctions { @JvmOverloads fun allImgs(dom: ValueDom, cssQuery: String = ":root"): ValueArray { val q = appendSelectorIfMissing(cssQuery, "img") - return Queries.select(dom, q) { it.attr("abs:src") } + return DomToH2Queries.select(dom, q) { it.attr("abs:src") } } @UDFunction(description = "Select the first image element from a DOM by the given css query " + @@ -270,7 +269,7 @@ object DomSelectFunctions { @JvmOverloads fun firstImg(dom: ValueDom, cssQuery: String = ":root"): String { val q = appendSelectorIfMissing(cssQuery, "img") - return Queries.selectFirstOrNull(dom, q) { it.attr("abs:src") } ?: "" + return DomToH2Queries.selectFirstOrNull(dom, q) { it.attr("abs:src") } ?: "" } @UDFunction(description = "Select the nth image element from a DOM by the given css query " + @@ -278,7 +277,7 @@ object DomSelectFunctions { @JvmStatic fun nthImg(dom: ValueDom, cssQuery: String, n: Int): String { val q = appendSelectorIfMissing(cssQuery, "img") - return Queries.selectNthOrNull(dom, q, n) { it.attr("abs:src") } ?: "" + return DomToH2Queries.selectNthOrNull(dom, q, n) { it.attr("abs:src") } ?: "" } @UDFunction @@ -286,7 +285,7 @@ object DomSelectFunctions { @JvmOverloads fun allHrefs(dom: ValueDom, cssQuery: String = ":root"): ValueArray { val q = appendSelectorIfMissing(cssQuery, "a") - return Queries.select(dom, q) { it.attr("abs:href") } + return DomToH2Queries.select(dom, q) { it.attr("abs:href") } } @UDFunction(description = "Select the first anchor element from a DOM by the given css query " + @@ -295,7 +294,7 @@ object DomSelectFunctions { @JvmOverloads fun firstHref(dom: ValueDom, cssQuery: String = ":root"): String { val q = appendSelectorIfMissing(cssQuery, "a") - return Queries.selectFirstOrNull(dom, q) { it.attr("abs:href") } ?: "" + return DomToH2Queries.selectFirstOrNull(dom, q) { it.attr("abs:href") } ?: "" } @UDFunction(description = "Select the nth anchor element from a DOM by the given css query " + @@ -303,7 +302,7 @@ object DomSelectFunctions { @JvmStatic fun nthHref(dom: ValueDom, cssQuery: String, n: Int): String { val q = appendSelectorIfMissing(cssQuery, "a") - return Queries.selectNthOrNull(dom, q, n) { it.attr("abs:href") } ?: "" + return DomToH2Queries.selectNthOrNull(dom, q, n) { it.attr("abs:href") } ?: "" } @UDFunction(description = "Select the first element from a DOM by the given css query " + @@ -338,7 +337,7 @@ object DomSelectFunctions { @JvmStatic fun allRe1(dom: ValueDom, cssQuery: String, regex: String): ValueArray { val extractor = RegexExtractor() - return Queries.select(dom, cssQuery) { extractor.re1(it.text(), regex) } + return DomToH2Queries.select(dom, cssQuery) { extractor.re1(it.text(), regex) } } @UDFunction(description = "Select the first element from a DOM whose text matches the regex " + @@ -374,7 +373,7 @@ object DomSelectFunctions { @JvmStatic fun allRe2(dom: ValueDom, cssQuery: String, regex: String): ValueArray { val extractor = RegexExtractor() - return Queries.select(dom, cssQuery) { extractor.re2(it.text(), regex).toString() } + return DomToH2Queries.select(dom, cssQuery) { extractor.re2(it.text(), regex).toString() } } @UDFunction @@ -399,7 +398,7 @@ object DomSelectFunctions { @JvmStatic fun allRe2(dom: ValueDom, cssQuery: String, regex: String, keyGroup: Int, valueGroup: Int): ValueArray { val extractor = RegexExtractor() - return Queries.select(dom, cssQuery) { extractor.re2(it.text(), regex, keyGroup, valueGroup).toString() } + return DomToH2Queries.select(dom, cssQuery) { extractor.re2(it.text(), regex, keyGroup, valueGroup).toString() } } private fun text(dom: ValueDom): String { diff --git a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/MetadataFunctionTables.kt b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/MetadataFunctionTables.kt index 836f98462..b9270ff59 100644 --- a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/MetadataFunctionTables.kt +++ b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/MetadataFunctionTables.kt @@ -7,7 +7,7 @@ import ai.platon.pulsar.ql.annotation.H2Context import ai.platon.pulsar.ql.annotation.UDFGroup import ai.platon.pulsar.ql.annotation.UDFunction import ai.platon.pulsar.ql.h2.H2SessionFactory -import ai.platon.pulsar.ql.h2.Queries +import ai.platon.pulsar.ql.h2.DomToH2Queries import java.sql.Connection import java.sql.ResultSet import java.time.Duration @@ -21,7 +21,7 @@ object MetadataFunctionTables { @JvmStatic fun load(@H2Context conn: Connection, configuredUrl: String): ResultSet { val page = H2SessionFactory.getSession(conn).load(configuredUrl) - return Queries.toResultSet(page) + return DomToH2Queries.toResultSet(page) } @UDFunction(description = "Load a page specified by url from the database, " + @@ -34,6 +34,6 @@ object MetadataFunctionTables { loadOptions.expires = Duration.ZERO val page = H2SessionFactory.getSession(conn).load(url, loadOptions) - return Queries.toResultSet(page) + return DomToH2Queries.toResultSet(page) } } diff --git a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/NewsFunctionTables.kt b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/NewsFunctionTables.kt index a082f2e6b..7428a4031 100644 --- a/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/NewsFunctionTables.kt +++ b/pulsar-ql/src/main/kotlin/ai/platon/pulsar/ql/h2/udfs/NewsFunctionTables.kt @@ -13,7 +13,7 @@ import ai.platon.pulsar.ql.annotation.H2Context import ai.platon.pulsar.ql.annotation.UDFGroup import ai.platon.pulsar.ql.annotation.UDFunction import ai.platon.pulsar.ql.h2.H2SessionFactory -import ai.platon.pulsar.ql.h2.Queries +import ai.platon.pulsar.ql.h2.DomToH2Queries import ai.platon.pulsar.ql.h2.addColumn import ai.platon.pulsar.ql.types.ValueDom import org.h2.jdbc.JdbcConnection @@ -106,7 +106,7 @@ object NewsFunctionTables { val (url, args) = UrlUtils.splitUrlArgs(portalUrl) ss.load(url, ss.options(args)) - val docs = Queries.loadOutPages(ss, portalUrl, restrictCss, offset, limit, normalize, ignoreQuery) + val docs = DomToH2Queries.loadOutPages(ss, portalUrl, restrictCss, offset, limit, normalize, ignoreQuery) .asSequence() .map { ss.parse(it) } val doms = docs.map { ValueDom.get(it.document) } diff --git a/pulsar-ql/src/test/kotlin/ai/platon/pulsar/ql/TestQueries.kt b/pulsar-ql/src/test/kotlin/ai/platon/pulsar/ql/TestDomToH2Queries.kt similarity index 85% rename from pulsar-ql/src/test/kotlin/ai/platon/pulsar/ql/TestQueries.kt rename to pulsar-ql/src/test/kotlin/ai/platon/pulsar/ql/TestDomToH2Queries.kt index 63d87a1c0..bf040a86b 100644 --- a/pulsar-ql/src/test/kotlin/ai/platon/pulsar/ql/TestQueries.kt +++ b/pulsar-ql/src/test/kotlin/ai/platon/pulsar/ql/TestDomToH2Queries.kt @@ -1,7 +1,7 @@ package ai.platon.pulsar.ql import ai.platon.pulsar.persist.WebPage -import ai.platon.pulsar.ql.h2.Queries +import ai.platon.pulsar.ql.h2.DomToH2Queries import java.util.concurrent.Executors import kotlin.test.Test import kotlin.test.assertEquals @@ -11,7 +11,7 @@ import kotlin.test.assertTrue * Created by vincent on 17-7-29. * Copyright @ 2013-2023 Platon AI. All rights reserved. */ -class TestQueries: TestBase() { +class TestDomToH2Queries: TestBase() { private val portalUrl = "https://www.amazon.com/Best-Sellers/zgbs" private val args = "-i 10d -ii 50d -ol a[href~=/dp/] -ignoreFailure" @@ -24,7 +24,7 @@ class TestQueries: TestBase() { // val restrictCss = "#content ul li a" val limit = 20 - val pages = Queries.loadOutPages(session, url, restrictCss, 1, limit) + val pages = DomToH2Queries.loadOutPages(session, url, restrictCss, 1, limit) pages.map { it.url }.distinct().forEachIndexed { i, url -> println("$i.\t$url") } assertTrue("Page size: " + pages.size) { pages.size <= limit } } @@ -37,7 +37,7 @@ class TestQueries: TestBase() { val executor = Executors.newWorkStealingPool() val futures = IntRange(1, parallel).map { executor.submit> { - val pages = Queries.loadOutPages(session, url, restrictCss, 1, limit) + val pages = DomToH2Queries.loadOutPages(session, url, restrictCss, 1, limit) pages.map { it.url }.distinct().forEachIndexed { i, url -> println("$i.\t$url") } assertTrue("Page size: " + pages.size) { pages.size <= limit } pages