Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion PatreonDownloader.App/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ private static async Task RunPatreonDownloader(CommandLineOptions commandLineOpt
return;
}

PatreonDownloaderSettings settings = await InitializeSettings(commandLineOptions);
PatreonCrawledUrlFilter filter = PatreonCrawledUrlFilter.GetInstance(settings);

_universalDownloader = new UniversalDownloader(new PatreonDownloaderModule());

_filesDownloaded = 0;
Expand All @@ -124,8 +127,8 @@ private static async Task RunPatreonDownloader(CommandLineOptions commandLineOpt
_universalDownloader.CrawlerMessage += UniversalDownloaderOnCrawlerMessage;
_universalDownloader.FileDownloaded += UniversalDownloaderOnFileDownloaded;

PatreonDownloaderSettings settings = await InitializeSettings(commandLineOptions);
await _universalDownloader.Download(commandLineOptions.Url, settings);
filter.SaveIgnorePostsToJson();

_universalDownloader.StatusChanged -= UniversalDownloaderOnStatusChanged;
_universalDownloader.PostCrawlStart -= UniversalDownloaderOnPostCrawlStart;
Expand Down
10 changes: 10 additions & 0 deletions PatreonDownloader.Implementation/Models/JSONObjects/IgnorePosts.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using Newtonsoft.Json;

namespace PatreonDownloader.Implementation.Models.JSONObjects.IgnorePosts
{
public class IgnorePost
{
[JsonProperty("id")]
public string Id { get; set; }
}
}
2 changes: 1 addition & 1 deletion PatreonDownloader.Implementation/PatreonCookieValidator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public async Task ValidateCookies(CookieContainer cookieContainer)
if (cookieContainer == null)
throw new ArgumentNullException(nameof(cookieContainer));

CookieCollection cookies = cookieContainer.GetCookies(new Uri("https://patreon.com"));
CookieCollection cookies = cookieContainer.GetAllCookies();

if (cookies["__cf_bm"] == null)
throw new CookieValidationException("__cf_bm cookie not found");
Expand Down
64 changes: 64 additions & 0 deletions PatreonDownloader.Implementation/PatreonCrawledUrlFilter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Newtonsoft.Json;
using PatreonDownloader.Implementation.Models.JSONObjects.IgnorePosts;
using UniversalDownloaderPlatform.Common.Interfaces.Models;

namespace PatreonDownloader.Implementation;

public class PatreonCrawledUrlFilter
{
private static PatreonCrawledUrlFilter _instance;
private const string IgnorePostsFileName = "ignorePosts.json";
private readonly List<IgnorePost> _ignorePosts ;
private readonly IUniversalDownloaderPlatformSettings _settings;

private PatreonCrawledUrlFilter(IUniversalDownloaderPlatformSettings settings)
{
_settings = settings;
_ignorePosts = GetIgnorePostsFromJson();
}

public static PatreonCrawledUrlFilter GetInstance()
{
if(_instance == null) throw new Exception("Instance not initialized");
return _instance;
}

public static PatreonCrawledUrlFilter GetInstance(IUniversalDownloaderPlatformSettings settings)
{
return _instance ??= new PatreonCrawledUrlFilter(settings);
}

private string GetIgnorePostsFilePath()
{
return $"{_settings.DownloadDirectory}/{IgnorePostsFileName}";
}

private List<IgnorePost> GetIgnorePostsFromJson()
{
if (!File.Exists(GetIgnorePostsFilePath())) return new List<IgnorePost>();

string json = File.ReadAllText(GetIgnorePostsFilePath());
List<IgnorePost> jsonRoot = JsonConvert.DeserializeObject<List<IgnorePost>>(json);
return jsonRoot;
}

public void SaveIgnorePostsToJson()
{
string json = JsonConvert.SerializeObject(_ignorePosts);
File.WriteAllText(GetIgnorePostsFilePath(), json);
}

public void FilterOutPages(List<PatreonCrawledUrl> crawledUrls)
{
crawledUrls.RemoveAll(x => _ignorePosts.Any(y => y.Id == x.PostId));
}

public void AddIgnorePost(IgnorePost ignorePost)
{
_ignorePosts.Add(ignorePost);
}
}
9 changes: 9 additions & 0 deletions PatreonDownloader.Implementation/PatreonDefaultPlugin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using PatreonDownloader.Implementation;
using PatreonDownloader.Implementation.Enums;
using PatreonDownloader.Implementation.Interfaces;
using PatreonDownloader.Implementation.Models.JSONObjects.IgnorePosts;
using UniversalDownloaderPlatform.Common.Exceptions;
using UniversalDownloaderPlatform.Common.Interfaces;
using UniversalDownloaderPlatform.Common.Interfaces.Models;
Expand All @@ -24,6 +25,7 @@ namespace PatreonDownloader.Engine
internal sealed class PatreonDefaultPlugin : IPlugin
{
private IWebDownloader _webDownloader;
private PatreonCrawledUrlFilter _crawledUrlFilter;

private readonly Logger _logger = LogManager.GetCurrentClassLogger();

Expand All @@ -37,6 +39,7 @@ internal sealed class PatreonDefaultPlugin : IPlugin
public PatreonDefaultPlugin(IWebDownloader webDownloader)
{
_webDownloader = webDownloader ?? throw new ArgumentNullException(nameof(webDownloader));
_crawledUrlFilter = PatreonCrawledUrlFilter.GetInstance();
}

public void OnLoad(IDependencyResolver dependencyResolver)
Expand All @@ -61,6 +64,12 @@ public async Task Download(ICrawledUrl crawledUrl)
throw new DownloadException($"Download path is not filled for {crawledUrl.Url}");

await _webDownloader.DownloadFile(crawledUrl.Url, Path.Combine(_settings.DownloadDirectory, crawledUrl.DownloadPath), null); //referer is set in PatreonWebDownloader

PatreonCrawledUrl crawledUrlPatreon = crawledUrl as PatreonCrawledUrl;
if(crawledUrlPatreon == null) throw new DownloadException("Crawled url is not of type PatreonCrawledUrl");

IgnorePost ignorePost = new IgnorePost {Id = crawledUrlPatreon.PostId};
_crawledUrlFilter.AddIgnorePost(ignorePost);
}

public Task BeforeStart(IUniversalDownloaderPlatformSettings settings)
Expand Down
7 changes: 5 additions & 2 deletions PatreonDownloader.Implementation/PatreonPageCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ internal sealed class PatreonPageCrawler : IPageCrawler
private readonly IPluginManager _pluginManager;
private readonly Logger _logger = LogManager.GetCurrentClassLogger();

private PatreonCrawledUrlFilter _patreonCrawledUrlFilter;
private PatreonDownloaderSettings _patreonDownloaderSettings;

public event EventHandler<PostCrawlEventArgs> PostCrawlStart;
public event EventHandler<PostCrawlEventArgs> PostCrawlEnd;
public event EventHandler<PostCrawlEventArgs> PostCrawlEnd;
public event EventHandler<NewCrawledUrlEventArgs> NewCrawledUrl;
public event EventHandler<CrawlerMessageEventArgs> CrawlerMessage;

Expand All @@ -48,7 +49,8 @@ public PatreonPageCrawler(IWebDownloader webDownloader, IPluginManager pluginMan

public async Task BeforeStart(IUniversalDownloaderPlatformSettings settings)
{
_patreonDownloaderSettings = (PatreonDownloaderSettings) settings;
_patreonDownloaderSettings = (PatreonDownloaderSettings)settings;
_patreonCrawledUrlFilter = PatreonCrawledUrlFilter.GetInstance(settings);
}

public async Task<List<ICrawledUrl>> Crawl(ICrawlTargetInfo crawlTargetInfo)
Expand Down Expand Up @@ -86,6 +88,7 @@ await File.WriteAllTextAsync(Path.Combine(_patreonDownloaderSettings.DownloadDir
json);

ParsingResult result = await ParsePage(json);
_patreonCrawledUrlFilter.FilterOutPages(result.CrawledUrls);

if(result.CrawledUrls.Count > 0)
crawledUrls.AddRange(result.CrawledUrls);
Expand Down