Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
bb6b46b
feat(): create map component, add area filtering to the job config
strech345 Feb 21, 2026
21db416
feat(): filter listings by area filter
strech345 Feb 22, 2026
2739fab
chore(): cleanup
strech345 Feb 22, 2026
ae975d9
feat(): solve feedback
strech345 Mar 1, 2026
9578192
feat(): solve most providers
strech345 Mar 1, 2026
a134da3
feat(): solve maybe other providers
strech345 Mar 1, 2026
85643fc
feat(): add specFilter config, also add rooms to listing
strech345 Mar 7, 2026
067682b
feat(): change tests
strech345 Mar 7, 2026
a7c7273
feat(): fix kleinanzeigen parser
strech345 Mar 7, 2026
82d08d7
feat(): add spec filter switch for listing overviiews
strech345 Mar 8, 2026
8b368a1
feat(): add rooms and size to the overview and detail of a listing
strech345 Mar 8, 2026
18cbe24
feat(): rem label
strech345 Mar 8, 2026
fdc7a7d
Merge remote-tracking branch 'upstream/master' into feature/spec_filter
strech345 Mar 8, 2026
fed77be
feat(): add types, update providers, they now return specs as numbers
strech345 Mar 16, 2026
27310b0
Merge branch 'master' into feature/spec_filter
strech345 Mar 16, 2026
153c63c
feat(): add jsonconfig to enable type checks
strech345 Mar 16, 2026
47e3bb9
Merge branch 'master' into feature/spec_filter
strech345 Mar 23, 2026
0074053
feat: add type for prividerConfig, add fieldNames per provider
strech345 Mar 23, 2026
7f9ce14
feat: fix tests, provider, add formatListing
strech345 Mar 23, 2026
2dedcbc
chore: remov duplicates
strech345 Mar 23, 2026
3a246ff
Merge branch 'master' into feature/spec_filter
strech345 Mar 23, 2026
2bf368d
feat(): fix tests
strech345 Mar 23, 2026
58c91f9
feat: fix immoscout
strech345 Mar 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions jsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"compilerOptions": {
"module": "NodeNext",
"moduleResolution": "NodeNext",
"target": "ESNext",
"checkJs": true,
"allowJs": true,
"noEmit": true,
"strict": false
},
"exclude": ["node_modules", "ui"]
}
165 changes: 95 additions & 70 deletions lib/FredyPipelineExecutioner.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,14 @@ import { distanceMeters } from './services/listings/distanceCalculator.js';
import { getUserSettings } from './services/storage/settingsStorage.js';
import { updateListingDistance } from './services/storage/listingsStorage.js';
import booleanPointInPolygon from '@turf/boolean-point-in-polygon';
import { formatListing } from './utils/formatListing.js';

/**
* @typedef {Object} Listing
* @property {string} id Stable unique identifier (hash) of the listing.
* @property {string} title Title or headline of the listing.
* @property {string} [address] Optional address/location text.
* @property {string} [price] Optional price text/value.
* @property {string} [url] Link to the listing detail page.
* @property {any} [meta] Provider-specific additional metadata.
*/

/**
* @typedef {Object} SimilarityCache
* @property {(title:string, address?:string)=>boolean} hasSimilarEntries Returns true if a similar entry is known.
* @property {(title:string, address?:string)=>void} addCacheEntry Adds a new entry to the similarity cache.
*/
/** @import { ParsedListing } from './types/listing.js' */
/** @import { Job } from './types/job.js' */
/** @import { ProviderConfig } from './types/providerConfig.js' */
/** @import { SpecFilter, SpatialFilter } from './types/filter.js' */
/** @import { SimilarityCache } from './types/similarityCache.js' */
/** @import { Browser } from './types/browser.js' */

/**
* Runtime orchestrator for fetching, normalizing, filtering, deduplicating, storing,
Expand All @@ -48,42 +40,43 @@ import booleanPointInPolygon from '@turf/boolean-point-in-polygon';
* 5) Identify new listings (vs. previously stored hashes)
* 6) Persist new listings
* 7) Filter out entries similar to already seen ones
* 8) Dispatch notifications
* 8) Filter out entries that do not match the job's specFilter
* 9) Filter out entries that do not match the job's spatialFilter
* 10) Dispatch notifications
*/
class FredyPipelineExecutioner {
/**
* Create a new runtime instance for a single provider/job execution.
*
* @param {Object} providerConfig Provider configuration.
* @param {string} providerConfig.url Base URL to crawl.
* @param {string} [providerConfig.sortByDateParam] Query parameter used to enforce sorting by date (provider-specific).
* @param {string} [providerConfig.waitForSelector] CSS selector to wait for before parsing content.
* @param {Object.<string, string>} providerConfig.crawlFields Mapping of field names to selectors/paths to extract.
* @param {string} providerConfig.crawlContainer CSS selector for the container holding listing items.
* @param {(raw:any)=>Listing} providerConfig.normalize Function to convert raw scraped data into a Listing shape.
* @param {(listing:Listing)=>boolean} providerConfig.filter Function to filter out unwanted listings.
* @param {(url:string, waitForSelector?:string)=>Promise<void>|Promise<Listing[]>} [providerConfig.getListings] Optional override to fetch listings.
* @param {Object} notificationConfig Notification configuration passed to notification adapters.
* @param {Object} spatialFilter Optional spatial filter configuration.
* @param {ProviderConfig} providerConfig Provider configuration.
* @param {Job} job Job configuration.
* @param {string} providerId The ID of the provider currently in use.
* @param {string} jobKey Key of the job that is currently running (from within the config).
* @param {SimilarityCache} similarityCache Cache instance for checking similar entries.
* @param browser
* @param {Browser} browser Puppeteer browser instance.
*/
constructor(providerConfig, notificationConfig, spatialFilter, providerId, jobKey, similarityCache, browser) {
constructor(providerConfig, job, providerId, similarityCache, browser) {
/** @type {ProviderConfig} */
this._providerConfig = providerConfig;
this._notificationConfig = notificationConfig;
this._spatialFilter = spatialFilter;
/** @type {Object} */
this._jobNotificationConfig = job.notificationAdapter;
/** @type {string} */
this._jobKey = job.id;
/** @type {SpecFilter | null} */
this._jobSpecFilter = job.specFilter;
/** @type {SpatialFilter | null} */
this._jobSpatialFilter = job.spatialFilter;
/** @type {string} */
this._providerId = providerId;
this._jobKey = jobKey;
/** @type {SimilarityCache} */
this._similarityCache = similarityCache;
/** @type {Browser} */
this._browser = browser;
}

/**
* Execute the end-to-end pipeline for a single provider run.
*
* @returns {Promise<Listing[]|void>} Resolves to the list of new (and similarity-filtered) listings
* @returns {Promise<ParsedListing[]|void>} Resolves to the list of new (and similarity-filtered) listings
* after notifications have been sent; resolves to void when there are no new listings.
*/
execute() {
Expand All @@ -95,7 +88,8 @@ class FredyPipelineExecutioner {
.then(this._geocode.bind(this))
.then(this._save.bind(this))
.then(this._calculateDistance.bind(this))
.then(this._filterBySimilarListings.bind(this))
.then(this._deleteSimilarListings.bind(this))
.then(this._filterBySpecs.bind(this))
.then(this._filterByArea.bind(this))
.then(this._notify.bind(this))
.catch(this._handleError.bind(this));
Expand All @@ -104,8 +98,8 @@ class FredyPipelineExecutioner {
/**
* Geocode new listings.
*
* @param {Listing[]} newListings New listings to geocode.
* @returns {Promise<Listing[]>} Resolves with the listings (potentially with added coordinates).
* @param {ParsedListing[]} newListings New listings to geocode.
* @returns {Promise<ParsedListing[]>} Resolves with the listings (potentially with added coordinates).
*/
async _geocode(newListings) {
for (const listing of newListings) {
Expand All @@ -124,20 +118,19 @@ class FredyPipelineExecutioner {
* Filter listings by area using the provider's area filter if available.
* Only filters if areaFilter is set on the provider AND the listing has coordinates.
*
* @param {Listing[]} newListings New listings to filter by area.
* @returns {Promise<Listing[]>} Resolves with listings that are within the area (or not filtered if no area is set).
* @param {ParsedListing[]} newListings New listings to filter by area.
* @returns {ParsedListing[]} Resolves with listings that are within the area (or not filtered if no area is set).
*/
_filterByArea(newListings) {
const polygonFeatures = this._spatialFilter?.features?.filter((f) => f.geometry?.type === 'Polygon');
const polygonFeatures = this._jobSpatialFilter?.features?.filter((f) => f.geometry?.type === 'Polygon');

// If no area filter is set, return all listings
if (!polygonFeatures?.length) {
return newListings;
}

const filteredIds = [];
// Filter listings by area - keep only those within the polygon
const keptListings = newListings.filter((listing) => {
const filteredListings = newListings.filter((listing) => {
// If listing doesn't have coordinates, keep it (don't filter out)
if (listing.latitude == null || listing.longitude == null) {
return true;
Expand All @@ -147,26 +140,42 @@ class FredyPipelineExecutioner {
const point = [listing.longitude, listing.latitude]; // GeoJSON format: [lon, lat]
const isInPolygon = polygonFeatures.some((feature) => booleanPointInPolygon(point, feature));

if (!isInPolygon) {
filteredIds.push(listing.id);
}

return isInPolygon;
});

if (filteredIds.length > 0) {
deleteListingsById(filteredIds);
return filteredListings;
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you remove the deleteListings here? That was added on purpose to not scrape things infinite?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be happend with merge. i dont realize that you did changes here.
i will fix it.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks ❤️

}

/**
* Filter listings based on its specifications (minRooms, minSize, maxPrice).
*
* @param {ParsedListing[]} newListings New listings to filter.
* @returns {ParsedListing[]} Resolves with listings that pass the specification filters.
*/
_filterBySpecs(newListings) {
const { minRooms, minSize, maxPrice } = this._jobSpecFilter || {};

// If no specs are set, return all listings
if (!minRooms && !minSize && !maxPrice) {
return newListings;
}

return keptListings;
const filtered = newListings.filter((listing) => {
if (minRooms && listing.rooms && listing.rooms < minRooms) return false;
if (minSize && listing.size && listing.size < minSize) return false;
if (maxPrice && listing.price && listing.price > maxPrice) return false;
return true;
});

return filtered;
}

/**
* Fetch listings from the provider, using the default Extractor flow unless
* a provider-specific getListings override is supplied.
*
* @param {string} url The provider URL to fetch from.
* @returns {Promise<Listing[]>} Resolves with an array of listings (empty when none found).
* @returns {Promise<ParsedListing[]>} Resolves with an array of listings (empty when none found).
*/
_getListings(url) {
const extractor = new Extractor({ ...this._providerConfig.puppeteerOptions, browser: this._browser });
Expand All @@ -189,33 +198,42 @@ class FredyPipelineExecutioner {
}

/**
* Normalize raw listings into the provider-specific Listing shape.
* Normalize raw listings into the provider-specific ParsedListing shape.
*
* @param {any[]} listings Raw listing entries from the extractor or override.
* @returns {Listing[]} Normalized listings.
* @returns {ParsedListing[]} Normalized listings.
*/
_normalize(listings) {
return listings.map(this._providerConfig.normalize);
return listings.map((listing) => this._providerConfig.normalize(listing));
}

/**
* Filter out listings that are missing required fields and those rejected by the
* provider's blacklist/filter function.
*
* @param {Listing[]} listings Listings to filter.
* @returns {Listing[]} Filtered listings that pass validation and provider filter.
* @param {ParsedListing[]} listings Listings to filter.
* @returns {ParsedListing[]} Filtered listings that pass validation and provider filter.
*/
_filter(listings) {
const keys = Object.keys(this._providerConfig.crawlFields);
const filteredListings = listings.filter((item) => keys.every((key) => key in item));
return filteredListings.filter(this._providerConfig.filter);
const requiredKeys = this._providerConfig.fieldNames;
const requireValues = ['id', 'link', 'title'];

const filteredListings = listings
// this should never filter some listings out, because the normalize function should always extract all fields.
.filter((item) => requiredKeys.every((key) => key in item))
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

like you told me i add it, without knowledge for what.

// TODO: move blacklist filter to this file, so it will handle for all providers in same way.
.filter(this._providerConfig.filter)
// filter out listings that are missing required fields
.filter((item) => requireValues.every((key) => item[key] != null));

return filteredListings;
}

/**
* Determine which listings are new by comparing their IDs against stored hashes.
*
* @param {Listing[]} listings Listings to evaluate for novelty.
* @returns {Listing[]} New listings not seen before.
* @param {ParsedListing[]} listings Listings to evaluate for novelty.
* @returns {ParsedListing[]} New listings not seen before.
* @throws {NoNewListingsWarning} When no new listings are found.
*/
_findNew(listings) {
Expand All @@ -232,23 +250,30 @@ class FredyPipelineExecutioner {
/**
* Send notifications for new listings using the configured notification adapter(s).
*
* @param {Listing[]} newListings New listings to notify about.
* @returns {Promise<Listing[]>} Resolves to the provided listings after notifications complete.
* @param {ParsedListing[]} newListings New listings to notify about.
* @returns {Promise<ParsedListing[]>} Resolves to the provided listings after notifications complete.
* @throws {NoNewListingsWarning} When there are no listings to notify about.
*/
_notify(newListings) {
if (newListings.length === 0) {
throw new NoNewListingsWarning();
}
const sendNotifications = notify.send(this._providerId, newListings, this._notificationConfig, this._jobKey);
// TODO: move this to the notification adapter, so it will handle for all providers in same way.
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you resolve this todo before we proceed?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i dont did it because it means changes on all notification files. Also this feels not correct. i'm not shure where to put it. Maybei can put it into the notify fnc?

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok I get it. leave it as it is for now :)

const formattedListings = newListings.map(formatListing);
const sendNotifications = notify.send(
this._providerId,
formattedListings,
this._jobNotificationConfig,
this._jobKey,
);
return Promise.all(sendNotifications).then(() => newListings);
}

/**
* Persist new listings and pass them through.
*
* @param {Listing[]} newListings Listings to store.
* @returns {Listing[]} The same listings, unchanged.
* @param {ParsedListing[]} newListings Listings to store.
* @returns {ParsedListing[]} The same listings, unchanged.
*/
_save(newListings) {
logger.debug(`Storing ${newListings.length} new listings (Provider: '${this._providerId}')`);
Expand All @@ -259,8 +284,8 @@ class FredyPipelineExecutioner {
/**
* Calculate distance for new listings.
*
* @param {Listing[]} listings
* @returns {Listing[]}
* @param {ParsedListing[]} listings
* @returns {ParsedListing[]}
* @private
*/
_calculateDistance(listings) {
Expand Down Expand Up @@ -296,10 +321,10 @@ class FredyPipelineExecutioner {
* Remove listings that are similar to already known entries according to the similarity cache.
* Adds the remaining listings to the cache.
*
* @param {Listing[]} listings Listings to filter by similarity.
* @returns {Listing[]} Listings considered unique enough to keep.
* @param {ParsedListing[]} listings Listings to filter by similarity.
* @returns {ParsedListing[]} Listings considered unique enough to keep.
*/
_filterBySimilarListings(listings) {
_deleteSimilarListings(listings) {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will than also rename this back. before your changes all filters looks like filters and this not, so i renamed it. now every filter is working with delete so i will rename it back

const filteredIds = [];
const keptListings = listings.filter((listing) => {
const similar = this._similarityCache.checkAndAddEntry({
Expand Down
2 changes: 2 additions & 0 deletions lib/api/routes/jobRouter.js
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ jobRouter.post('/', async (req, res) => {
enabled,
shareWithUsers = [],
spatialFilter = null,
specFilter = null,
} = req.body;
const settings = await getSettings();
try {
Expand All @@ -197,6 +198,7 @@ jobRouter.post('/', async (req, res) => {
notificationAdapter,
shareWithUsers,
spatialFilter,
specFilter,
});
} catch (error) {
res.send(new Error(error));
Expand Down
3 changes: 3 additions & 0 deletions lib/api/routes/listingsRouter.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ listingsRouter.get('/table', async (req, res) => {
sortfield = null,
sortdir = 'asc',
freeTextFilter,
filterByJobSettings,
} = req.query || {};

// normalize booleans (accept true, 'true', 1, '1' for true; false, 'false', 0, '0' for false)
Expand All @@ -37,6 +38,7 @@ listingsRouter.get('/table', async (req, res) => {
};
const normalizedActivity = toBool(activityFilter);
const normalizedWatch = toBool(watchListFilter);
const normalizedFilterByJobSettings = toBool(filterByJobSettings) ?? true;

let jobFilter = null;
let jobIdFilter = null;
Expand All @@ -56,6 +58,7 @@ listingsRouter.get('/table', async (req, res) => {
jobIdFilter: jobIdFilter,
providerFilter,
watchListFilter: normalizedWatch,
filterByJobSettings: normalizedFilterByJobSettings,
sortField: sortfield || null,
sortDir: sortdir === 'desc' ? 'desc' : 'asc',
userId: req.session.currentUser,
Expand Down
Loading
Loading