using AngleSharp.Html.Dom; using AngleSharp.Html.Parser; using Microsoft.Azure.CosmosDB.Table; using Microsoft.Azure.Storage; using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Configuration; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; using taloyhtio.idp.parser.common.model; namespace fivaldi { class Program { private static readonly Regex numcode = new Regex("^\\d{2,3}$", RegexOptions.Compiled); private static readonly Regex date = new Regex("^\\d{2}\\.\\d{2}\\.\\d{4}$", RegexOptions.Compiled); private static readonly Regex flat = new Regex("^[A-Z]\\s\\d{1,3}$", RegexOptions.Compiled); private static readonly Regex containsNumber = new Regex("\\d+", RegexOptions.Compiled); private static readonly Regex series = new Regex(".+Osakesarja$", RegexOptions.Compiled); private const string MARKER = "Muut tilat"; private const string CONDONAME = "hiihtomaentie16"; private static void InitClient() { var storageAccount = CloudStorageAccount.Parse(ConfigurationManager.ConnectionStrings[Constants.KEY_AZURE_STORAGE].ConnectionString); tableClient = storageAccount.CreateCloudTableClient( new TableConnectionPolicy() { UseDirectMode = false }); } private static List GetCondo(string condoName) { var table = tableClient.GetTableReference("mdbCondoMappings"); table.CreateIfNotExists(); var allCondos = table.CreateQuery().ToList(); if (condoName != null) { return allCondos.Where(x => condoName.Contains(x.PMSCondoName)).ToList(); } return allCondos; } private static IHtmlDocument document; private static CloudTableClient tableClient; private static List flats; private static CondoMapping condo; static void Main(string[] args) { CollectFlatData(args[0]); InitClient(); condo = GetCondo(CONDONAME)[0]; if (condo == null) { return; } SaveFlats(); } static void SaveFlats() { var table = tableClient.GetTableReference("mdbFlats"); table.CreateIfNotExists(); flats.ForEach(f => { var id = Guid.NewGuid(); var flat = new Flat { PartitionKey = condo.TaloyhtioPMCId.ToString(), Id = id, RowKey = id.ToString(), CondoPMS = condo.PMSCondoName, FlatTitle = f.Title, PMCTaloyhtioId = condo.TaloyhtioPMCId, FlatUsers = JsonConvert.SerializeObject( f.People.ConvertAll(p => { return new FlatUser { DisplayName = p.Name, PIN = Util.ConvertPIN(p.BirthDate) }; }) ) }; var result = table.Execute(TableOperation.InsertOrReplace(flat)); Console.WriteLine(result.HttpStatusCode); }); } static void CollectFlatData(string source) { string dataUrl = Directory.GetCurrentDirectory() + "\\" + source; string sourceDoc = File.ReadAllText(dataUrl, Encoding.GetEncoding(1252)); document = new HtmlParser().ParseDocument(sourceDoc); flats = new List(); var trs = document.QuerySelectorAll("tr").ToList(); for(int i = 0; i < trs.Count; i++) { var tr = trs[i]; var tds = tr.QuerySelectorAll("td").ToList(); if (tds.Count < 2) continue; var tdFlat = tds[1].TextContent; if (tdFlat == null || tdFlat.Trim() == "" || !flat.IsMatch(tdFlat)) continue; flats.Add(new FlatData { Title = tdFlat, Row = i }); } for (int i = 0; i < flats.Count; i++) { var flat = flats[i]; var row = flat.Row; var last = i == flats.Count - 1; do { var tr = trs[row]; var strings = tr.QuerySelectorAll("td").Where(td => !string.IsNullOrWhiteSpace(td.TextContent)).Select(x => x.TextContent).ToList(); if (row == flat.Row) { if (strings.Count >= 7 && !strings[6].Contains("100,00")) { flat.People.Add(new Person { Name = strings[6] }); } } else { if (strings.Count > 1) { if (date.IsMatch(strings[0])) { flat.People.Last().BirthDate = strings[0]; } else if (!containsNumber.IsMatch(strings[0]) && strings.Count < 4) { flat.People.Add(new Person { Name = strings[0] }); } } } row++; } while (last ? (row < trs.Count) : (row < flats[i + 1].Row)); } foreach (var flat in flats) { Console.WriteLine(flat.Title); foreach (var person in flat.People) { Console.WriteLine($"\t{person.Name} : {person.BirthDate}"); } Console.WriteLine(" "); } //Console.ReadLine(); } } }