Skip to content

Commit 3903fba

Browse files
committed
Continue refactoring and changes for 1.2.0
1 parent 930fdf1 commit 3903fba

11 files changed

Lines changed: 279 additions & 243 deletions

File tree

SmartImage.Lib/Engines/Results/SearchResultItem.cs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ public override async ValueTask<bool> AllocImageAsync(CancellationToken ct = def
217217
}
218218

219219
bool allocImgOk = false;
220-
var allocOk = await AllocAsync(ct);
220+
var allocOk = await AllocSourceAsync(ct);
221221

222222
if (allocOk) {
223223
allocImgOk = await base.AllocImageAsync(ct);
@@ -286,7 +286,7 @@ await Parallel.ForEachAsync(urls, ct, async (s, token) =>
286286
IsScanResult = true,
287287
};
288288
289-
// var sriNew = CloneToChildWithUrl(s);
289+
// var sriNew = MemberwiseCloneWithUrl(s);
290290
291291
var allocImgOk = await sriNew.AllocImageAsync(token);
292292
@@ -321,8 +321,8 @@ public async ValueTask<bool> ScanAsync(CancellationToken ct = default)
321321

322322
var task = ScanAsync(cw.Writer, s =>
323323
{
324-
var obj= CloneWithUrl(s);
325-
return obj;
324+
var clone= PartialCopyCloneWithUrl(s);
325+
return clone;
326326
}, ct);
327327

328328
while (await cw.Reader.WaitToReadAsync(ct)) {
@@ -334,13 +334,11 @@ public async ValueTask<bool> ScanAsync(CancellationToken ct = default)
334334
var ok = await task;
335335

336336
return ok;
337-
338-
// return sriNews;
339337
}
340338

341339
#endregion
342340

343-
public SearchResultItem CloneWithUrl(Url s)
341+
public SearchResultItem PartialCopyCloneWithUrl(Url s)
344342
{
345343
return new SearchResultItem(Root, false)
346344
{
@@ -353,11 +351,12 @@ public SearchResultItem CloneWithUrl(Url s)
353351
Site = Site,
354352
Source = Source,
355353
Time = Time,
354+
Metadata = Metadata,
356355
IsCloned = true,
357356
};
358357
}
359358

360-
public SearchResultItem CloneToChildWithUrl(Url u)
359+
public SearchResultItem MemberwiseCloneWithUrl(Url u)
361360
{
362361
var clone = (MemberwiseClone() as SearchResultItem);
363362
clone.Url = u;

SmartImage.Lib/Engines/Search/TinEyeEngine.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ public override async Task<SearchResult> GetResultAsync(SearchQuery query, Cance
121121
for (int m = 1; m < backlinks.Count; m++) {
122122
var bl = backlinks[m];
123123

124-
var resultItemSister = resultItem.CloneToChildWithUrl(bl.Backlink);
124+
var resultItemSister = resultItem.MemberwiseCloneWithUrl(bl.Backlink);
125125
resultItemSister.Description = bl.ImageName;
126126
resultItemSister.Title = bl.ImageName;
127127
resultItemSister.Time = DateTime.Parse(bl.CrawlDate);

SmartImage.Lib/Images/ImageScanner.cs

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -130,25 +130,22 @@ static ImageScanner()
130130

131131
#endregion
132132

133-
#region Scanning
133+
#region Scanning/parsing
134134

135-
public static readonly string[] UrlSegmentBlacklist = ["thumbs", ".svg", ".ico", "twitter.svg", "pinterest.svg"];
135+
public static readonly string[] UrlSegmentBlacklist = ["thumbs", ".svg", ".ico", "twitter.svg", "pinterest.svg", "favicon"];
136136

137137
public static readonly string[] LegalSchemeWhitelist = ["http", "https"];
138138

139139
internal const char URL_DELIM = '/';
140140

141-
/// <summary>
142-
/// Scans for images within the webpage located at <paramref name="url"/>; if <paramref name="url"/> itself
143-
/// points to binary image data, it is returned.
144-
/// </summary>
145-
public static async Task<bool> ScanForImagesAsync(Url url, ChannelWriter<UniImage> cw, CancellationToken ct = default)
141+
142+
/*public static async Task<bool> ScanForImagesAsync(Url url, ChannelWriter<UniImage> cw, CancellationToken ct = default)
146143
{
147144
string sz = null;
148145
149146
IHtmlDocument doc = null;
150147
151-
/* Immediate search */
148+
/* Immediate search #1#
152149
var uf = await UniImage.TryCreateAsync(url, autoInit: true, autoDisposeOnError: false, ct: ct);
153150
154151
IFlurlResponse res;
@@ -176,7 +173,7 @@ public static async Task<bool> ScanForImagesAsync(Url url, ChannelWriter<UniImag
176173
/*if (!stream.CanRead) {
177174
stream.Dispose();
178175
goto ret;
179-
}*/
176+
}#1#
180177
181178
var dp = new HtmlParser();
182179
@@ -214,7 +211,7 @@ async ValueTask Body(string s, CancellationToken token)
214211
uni?.Dispose();
215212
}
216213
}
217-
}
214+
}*/
218215

219216
public static IEnumerable<string> ParseImageUrlsByRegex(string html, Url url, bool heuristicFilter = true)
220217
{
@@ -239,6 +236,19 @@ public static IEnumerable<string> ParseImageUrlsByRegex(string html, Url url, bo
239236
}
240237
}
241238

239+
imgUrls = imgUrls.Distinct().Where(e =>
240+
{
241+
if (e.StartsWith("url(")) {
242+
return false;
243+
}
244+
245+
return Url.IsValid(e);
246+
});
247+
248+
if (heuristicFilter) {
249+
imgUrls = imgUrls.Where(static u => !UrlSegmentBlacklist.Any(u.Contains));
250+
}
251+
242252
var abs = imgUrls.Select(u =>
243253
{
244254
if (u.StartsWith("http"))
@@ -253,11 +263,7 @@ public static IEnumerable<string> ParseImageUrlsByRegex(string html, Url url, bo
253263

254264
// return baseUrl + URL_DELIM + u;
255265
return Url.Combine(baseUrl, URL_DELIM.ToString(), u);
256-
}).Select(static u => Url.Decode(u, true)).Where(Url.IsValid).Distinct();
257-
258-
if (heuristicFilter) {
259-
abs = abs.Where(static u => !UrlSegmentBlacklist.Any(u.Contains));
260-
}
266+
});
261267

262268
return abs;
263269
}
@@ -278,10 +284,19 @@ public static IEnumerable<string> ParseImageUrlsByDoc(IHtmlDocument doc)
278284

279285
public static async ValueTask<IFlurlResponse> GetResponseAsync(Url value, CancellationToken ct)
280286
{
281-
var req1 = await Client.Request(value)
282-
.GetAsync(cancellationToken: ct);
287+
var request = Client.Request(value);
288+
289+
if (value.ToString().Contains("zerochan")) {
290+
// request = request.WithHeader("User-Agent", R1.Name);
291+
292+
request = new FlurlRequest(value) { };
293+
}
294+
295+
var response = await request
296+
.OnError(act => { act.ExceptionHandled = true; })
297+
.GetAsync(cancellationToken: ct);
283298

284-
return req1;
299+
return response;
285300
}
286301

287302
#endregion

SmartImage.Lib/Images/Uni/UniImage.cs

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
using SixLabors.ImageSharp.Formats.Png;
1616
using SixLabors.ImageSharp.PixelFormats;
1717
using System.Diagnostics;
18+
using System.Diagnostics.CodeAnalysis;
1819
using System.Runtime.InteropServices;
1920
using CoenM.ImageHash;
2021
using CommunityToolkit.HighPerformance;
2122
using Kantan.Net.Utilities;
2223
using Microsoft.IO;
24+
using SixLabors.ImageSharp.Memory;
2325
using SmartImage.Lib.Model;
2426
using SmartImage.Lib.Utilities;
2527

@@ -128,7 +130,7 @@ protected set
128130

129131
#endregion
130132

131-
#region
133+
#region
132134

133135
public double? Similarity
134136
{
@@ -142,7 +144,7 @@ public virtual bool CalculateSimilarity(IHashable hashable)
142144
return Similarity.HasValue;
143145
}
144146

145-
#endregion
147+
#endregion
146148

147149
#region
148150

@@ -172,9 +174,6 @@ public Stream GetStream()
172174
#endregion
173175

174176

175-
public static readonly UniImage Null = null;
176-
177-
178177
private protected UniImage(string value, UniImageType type)
179178
{
180179
Value = value;
@@ -184,10 +183,13 @@ private protected UniImage(string value, UniImageType type)
184183

185184
#region
186185

187-
protected abstract ValueTask<bool> AllocAsync(CancellationToken ct = default);
186+
/// <summary>
187+
/// Allocates <see cref="Bytes"/>
188+
/// </summary>
189+
protected abstract ValueTask<bool> AllocSourceAsync(CancellationToken ct = default);
188190

189191
/// <summary>
190-
/// Allocates <see cref="Image"/>
192+
/// Allocates <see cref="Image"/> from <see cref="Bytes"/>
191193
/// </summary>
192194
public virtual async ValueTask<bool> AllocImageAsync(CancellationToken ct = default)
193195
{
@@ -211,13 +213,26 @@ public virtual async ValueTask<bool> AllocImageAsync(CancellationToken ct = defa
211213

212214
}
213215

216+
/// <returns><see cref="AllocSourceAsync"/>, <see cref="AllocImageAsync"/></returns>
217+
protected virtual async ValueTask<(bool AllocOk, bool AllocImageOk)> AllocAll(CancellationToken ct)
218+
{
219+
bool allocOk = await AllocSourceAsync(ct);
220+
bool allocImgOk = false;
221+
222+
if (allocOk) {
223+
allocImgOk = await AllocImageAsync(ct);
224+
}
225+
226+
return (allocOk, allocImgOk);
227+
}
228+
214229
/// <summary>
215230
/// Attempts to create the appropriate <see cref="UniImage" /> for <paramref name="o" />.
216231
/// </summary>
217232
public static async Task<UniImage> TryCreateAsync(object o, bool autoInit = true, bool autoDisposeOnError = true,
218233
CancellationToken ct = default)
219234
{
220-
UniImage ui = Null;
235+
UniImage ui = null;
221236

222237
try {
223238

@@ -235,11 +250,7 @@ public static async Task<UniImage> TryCreateAsync(object o, bool autoInit = true
235250
bool allocOk = false;
236251
bool allocImgOk = false;
237252

238-
allocOk = await ui.AllocAsync(ct);
239-
240-
if (allocOk) {
241-
allocImgOk = await ui.AllocImageAsync(ct);
242-
}
253+
(allocOk,allocImgOk) = await ui.AllocAll(ct);
243254

244255
s_logger.LogTrace("{Value} :: {AllocOk} {AllocImgOk}", o, allocOk, allocImgOk);
245256

@@ -270,7 +281,7 @@ public static bool IsValidSourceType(object o)
270281

271282
#endregion
272283

273-
#region New region
284+
#region New region
274285

275286
public bool TryWriteOrGetFile(string fn = null)
276287
{
@@ -314,13 +325,14 @@ public virtual string WriteImageToFile([CBN] string fn = null)
314325
return path;
315326
}
316327

317-
#endregion
328+
#endregion
318329

319330

320331
public virtual void Dispose()
321332
{
322333
GC.SuppressFinalize(this);
323334
Image?.Dispose();
335+
s_logger.LogTrace("Disposing {Uv} {Ut}", Value, Type);
324336
}
325337

326338
public override string ToString()

SmartImage.Lib/Images/Uni/UniImageFile.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ internal UniImageFile(FileInfo fi) : base(fi.FullName, UniImageType.File)
2020

2121
public FileInfo LocalFileInfo { get; }
2222

23-
protected override async ValueTask<bool> AllocAsync(CancellationToken ct = default)
23+
protected override async ValueTask<bool> AllocSourceAsync(CancellationToken ct = default)
2424
{
2525
if (HasBytes) {
2626
goto ret;

0 commit comments

Comments
 (0)