jigdo API
Last update by Admin on 2010-05-23
scan.hh
Go to the documentation of this file.00001 /* $Id: scan.hh,v 1.33 2003/03/03 20:47:17 richard Exp $ -*- C++ -*- 00002 __ _ 00003 |_) /| Copyright (C) 2001-2002 | richard@ 00004 | \/¯| Richard Atterer | atterer.net 00005 ¯ '` ¯ 00006 This program is free software; you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License, version 2. See 00008 the file COPYING for details. 00009 00010 */ 00016 #ifndef SCAN_HH 00017 #define SCAN_HH 00018 00019 #include <config.h> 00020 00021 #include <list> 00022 #include <set> 00023 #include <vector> 00024 #include <time.h> 00025 #include <sys/stat.h> 00026 00027 #include <cachefile.hh> 00028 #include <debug.hh> 00029 #include <md5sum.hh> 00030 #include <recursedir.fh> 00031 #include <rsyncsum.hh> 00032 #include <scan.fh> 00033 #include <string.hh> 00034 //______________________________________________________________________ 00035 00039 class LocationPath { 00041 friend class JigdoCache; 00042 public: 00044 bool operator<(const LocationPath& x) const { return path < x.path; } 00045 const string& getPath() const { return path; } 00046 const string& getLabel() const { return label; } 00047 LocationPath& setLabel(const string& l) { label = l; return *this; } 00048 const string& getUri() const { return uri; } 00049 private: 00050 template <class StringP, class StringL, class StringU> 00051 inline LocationPath(const StringP& p, const StringL& l, 00052 const StringU& u = ""); 00053 // Using default dtor & copy ctor 00054 // The data members are accessed directly by JigdoCache methods 00055 string path; // name of directory 00056 string label; // short label to identify path in location list 00057 string uri; // e.g. URL for directory on FTP server 00058 }; 00059 00060 typedef set<LocationPath> LocationPathSet; 00061 //______________________________________________________________________ 00062 00071 class FilePart { 00073 friend class JigdoCache; 00074 public: 00076 inline const string& getPath() const; 00077 LocationPathSet::iterator getLocation() { return path; } 00080 inline const string& leafName() const; 00081 inline uint64 size() const; 00082 inline time_t mtime() const; 00085 inline const MD5* getSums(JigdoCache* c, size_t blockNr); 00087 inline const MD5Sum* getMD5Sum(JigdoCache* c); 00089 inline const RsyncSum64* getRsyncSum(JigdoCache* c); 00090 00097 inline void markAsDeleted(JigdoCache* c); 00101 bool deleted() const { return fileSize == 0; } 00102 00105 ~FilePart() { } 00106 // default copy ctor / assignment op 00107 //__________ 00108 00109 private: 00110 inline FilePart(LocationPathSet::iterator p, string rest, uint64 fSize, 00111 time_t fMtime); 00112 00113 /* Called when the methods getSums/getMD5Sum() need data read from 00114 file. Might return null. */ 00115 const MD5* getSumsRead(JigdoCache* c, size_t blockNr); 00116 const MD5Sum* getMD5SumRead(JigdoCache* c); 00117 //__________ 00118 00119 /* There are 3 states of a FilePart: 00120 a) sums.empty(): File has not been read from so far 00121 b) !sums.empty() && !mdValid: sums[0] and rsyncSum are valid 00122 c) !sums.empty() && mdValid: all sums[] and rsyncSum and md5Sum valid*/ 00123 00124 LocationPathSet::iterator path; 00125 string pathRest; // further dir names after "path", and leafname of file 00126 uint64 fileSize; 00127 time_t fileMtime; 00128 00129 /* RsyncSum64 of the first MkTemplate::blockLength bytes of the 00130 file. */ 00131 RsyncSum64 rsyncSum; 00132 bool rsyncValid() const { return sums.size() > 0; } 00133 00134 /* File is split up into chunks of length md5BlockLength (the last 00135 one may be smaller) and the MD5 checksum of each is 00136 calculated. */ 00137 vector<MD5> sums; 00138 00139 /* Hash of complete file contents. mdValid is true iff md5Sum is 00140 cached and valid. NB, it is possible that mdValid==true, but 00141 sums.size()==0, after md5BlockSize has been changed. If 00142 sums.size()==fileSize/md5ChunkSize, then not necessarily 00143 mdValid==true */ 00144 MD5Sum md5Sum; 00145 enum Flags { 00146 EMPTY = 0, 00147 // Bit flag is set iff md5Sum contains the whole file's md5 sum 00148 MD_VALID = 1, 00149 /* This file was looked up in the cache file (whether successfully 00150 or not doesn't matter) - don't look it up again. */ 00151 WAS_LOOKED_UP = 2, 00152 // Write this file's info into the cache file during ~JigdoCache() 00153 TO_BE_WRITTEN = 4 00154 }; 00155 Flags flags; 00156 bool getFlag(Flags f) const { return (flags & f) != 0; } 00157 inline void setFlag(Flags f); 00158 inline void clearFlag(Flags f); 00159 bool mdValid() const { return getFlag(MD_VALID); } 00160 00161 # if HAVE_LIBDB 00162 // Offsets for binary representation in database (see cachefile.hh) 00163 enum { 00164 BLOCKLEN = 0, MD5BLOCKLEN = 4, MD5BLOCKS = 8, RSYNCSUM = 12, 00165 FILE_MD5SUM = 20, PART_MD5SUM = 36 00166 }; 00167 00168 size_t unserializeCacheEntry(const byte* data, size_t dataSize, 00169 size_t md5BlockLength); // Byte stream => FilePart 00170 struct SerializeCacheEntry; // FilePart => byte stream 00171 friend struct SerializeCacheEntry; 00172 # endif 00173 }; 00174 //______________________________________________________________________ 00175 00178 struct JigdoCacheError : Error { 00179 explicit JigdoCacheError(const string& m) : Error(m) { } 00180 explicit JigdoCacheError(const char* m) : Error(m) { } 00181 }; 00182 00194 class JigdoCache { 00195 friend class FilePart; 00196 public: 00197 class ProgressReporter; 00199 explicit JigdoCache(const string& cacheFileName, 00200 size_t expiryInSeconds = 60*60*24*30, size_t bufLen = 128*1024, 00201 ProgressReporter& pr = noReport); 00203 ~JigdoCache(); 00204 00208 template <class RecurseDir> 00209 inline void readFilenames(RecurseDir& rd); 00218 void setParams(size_t blockLen,size_t md5BlockLen); 00219 size_t getBlockLen() const { return blockLength; } 00220 size_t getMD5BlockLen() const { return md5BlockLength; } 00221 00224 inline void setReadAmount(size_t bytes); 00229 void deallocBuffer() { buffer.resize(0); } 00230 00232 ProgressReporter* getReporter() { return &reporter; } 00233 00235 inline void setCheckFiles(bool check) { checkFiles = check; } 00236 inline bool getCheckFiles() const { return checkFiles; } 00237 00239 inline size_t size() const { return nrOfFiles; } 00240 00249 template <class StringP, class StringL, class StringU> 00250 const LocationPathSet::iterator addLabel( 00251 const StringP& path, const StringL& label, const StringU& uri = ""); 00252 00254 typedef FilePart value_type; 00255 typedef FilePart& reference; 00256 //____________________ 00257 00259 class iterator { 00260 friend class JigdoCache; 00261 friend class FilePart; 00262 public: 00263 iterator() { } 00264 inline iterator& operator++(); // might throw(RecurseError, bad_alloc) 00265 FilePart& operator*() { return *part; } 00266 FilePart* operator->() { return &*part; } 00267 // Won't compare cache members - their being different is usu. a bug 00268 bool operator==(const iterator& i) const { 00269 Paranoid(cache == i.cache); 00270 return part == i.part; 00271 } 00272 bool operator!=(const iterator& i) const { return !(*this == i); } 00273 // Default dtor 00274 private: 00275 iterator(JigdoCache* c, const list<FilePart>::iterator& p) 00276 : cache(c), part(p) { } 00277 JigdoCache* cache; 00278 list<FilePart>::iterator part; 00279 }; 00280 friend class JigdoCache::iterator; 00282 inline iterator begin(); 00285 iterator end() { return iterator(this, files.end()); } 00286 //____________________ 00287 00288 private: 00289 // Read one filename from recurseDir and (if success) add entry to "files" 00290 void addFile(const string& name); 00292 static ProgressReporter noReport; 00293 00294 size_t blockLength, md5BlockLength; 00295 00296 /* Check if files exist in the filesystem */ 00297 bool checkFiles; 00298 00299 /* List of files in the cache (not vector<> because jigdo-file keeps 00300 ptrs, and if a vector realloc()s, all elements' addresses may 00301 change) */ 00302 list<FilePart> files; 00303 // Equal to files.size() less any files that are deleted() 00304 size_t nrOfFiles; 00305 // Temporarily used during readFilenames() 00306 static struct stat fileInfo; 00307 00308 // Look up LocationPath by directory name 00309 LocationPathSet locationPaths; 00310 00311 size_t readAmount; 00312 vector<byte> buffer; 00313 ProgressReporter& reporter; 00314 00315 # if HAVE_LIBDB 00316 CacheFile* cacheFile; 00317 size_t cacheExpiry; 00318 # endif 00319 }; 00320 //______________________________________________________________________ 00321 00324 class JigdoCache::ProgressReporter { 00325 public: 00326 virtual ~ProgressReporter() { } 00331 virtual void error(const string& message); 00335 virtual void info(const string& message); 00345 virtual void scanningFile(const FilePart* file, uint64 offInFile); 00346 }; 00347 //______________________________________________________________________ 00348 00349 FilePart::FilePart(LocationPathSet::iterator p, string rest, uint64 fSize, 00350 time_t fMtime) 00351 : path(p), pathRest(rest), fileSize(fSize), fileMtime(fMtime), 00352 rsyncSum(), sums(), md5Sum(), flags(EMPTY) { 00353 //pathRest.reserve(0); 00354 } 00355 00356 const string& FilePart::getPath() const { 00357 Paranoid(!deleted()); 00358 return path->getPath(); 00359 } 00360 00361 const string& FilePart::leafName() const { 00362 Paranoid(!deleted()); 00363 return pathRest; 00364 } 00365 00366 uint64 FilePart::size() const { 00367 Paranoid(!deleted()); 00368 return fileSize; 00369 } 00370 00371 time_t FilePart::mtime() const { 00372 Paranoid(!deleted()); 00373 return fileMtime; 00374 } 00375 00376 const MD5* FilePart::getSums(JigdoCache* c, size_t blockNr) { 00377 Paranoid(!deleted()); 00378 if (mdValid() || (blockNr == 0 && !sums.empty())) return &sums[blockNr]; 00379 else return getSumsRead(c, blockNr); 00380 } 00381 00382 const MD5Sum* FilePart::getMD5Sum(JigdoCache* c) { 00383 Paranoid(!deleted()); 00384 if (mdValid()) return &md5Sum; 00385 else return getMD5SumRead(c); 00386 } 00387 00388 const RsyncSum64* FilePart::getRsyncSum(JigdoCache* c) { 00389 Paranoid(!deleted()); 00390 if (!rsyncValid()) { 00391 if (getSumsRead(c, 0) == 0) return 0; 00392 } 00393 Paranoid(rsyncValid()); 00394 return &rsyncSum; 00395 } 00396 00397 void FilePart::markAsDeleted(JigdoCache* c) { 00398 fileSize = 0; sums.resize(0); --(c->nrOfFiles); 00399 } 00400 00401 void FilePart::setFlag(Flags f) { 00402 flags = static_cast<Flags>(static_cast<unsigned>(flags) 00403 | static_cast<unsigned>(f)); 00404 } 00405 00406 void FilePart::clearFlag(Flags f) { 00407 flags = static_cast<Flags>(static_cast<unsigned>(flags) 00408 & ~static_cast<unsigned>(f)); 00409 } 00410 //________________________________________ 00411 00412 void JigdoCache::setReadAmount(size_t bytes) { 00413 if (bytes < 64*1024) bytes = 64*1024; 00414 readAmount = bytes; 00415 } 00416 00417 template <class RecurseDir> 00418 void JigdoCache::readFilenames(RecurseDir& rd) { 00419 string name; 00420 while (true) { 00421 bool status = rd.getName(name, &fileInfo, checkFiles); // Might throw error 00422 if (status == FAILURE) return; // No more names 00423 off_t stSize = fileInfo.st_size; 00424 # if HAVE_LIBDB 00425 if (!checkFiles) { 00426 const byte* data; 00427 size_t dataSize; 00428 try { 00429 if (cacheFile->findName(data, dataSize, name, stSize, 00430 fileInfo.st_mtime).failed()) 00431 continue; 00432 } catch (DbError e) { 00433 string err = subst(_("Error accessing cache: %1"), e.message); 00434 reporter.error(err); 00435 } 00436 } 00437 # endif 00438 if (stSize == 0) continue; // Skip zero-length files 00439 addFile(name); 00440 } 00441 } 00442 00443 JigdoCache::iterator JigdoCache::begin() { 00444 list<FilePart>::iterator i = files.begin(); 00445 while (i != files.end() && i->deleted()) ++i; 00446 return iterator(this, i); 00447 } 00448 00449 JigdoCache::iterator& JigdoCache::iterator::operator++() { 00450 list<FilePart>::iterator end = cache->files.end(); 00451 do ++part; while (part != end && part->deleted()); 00452 return *this; 00453 } 00454 //______________________________________________________________________ 00455 00456 template <class StringP, class StringL, class StringU> 00457 LocationPath::LocationPath(const StringP& p, const StringL& l, 00458 const StringU& u) 00459 : path(p), label(l), uri(u) { 00460 //path.reserve(0); label.reserve(0); uri.reserve(0); 00461 } 00462 //________________________________________ 00463 00464 /* NB: If an entry with the given path already exists, we must not 00465 delete it and create another, because it is referenced from at 00466 least one FilePart. Because of our version of 00467 LocationPath::operator<, only the path matters during the find(). */ 00468 template <class StringP, class StringL, class StringU> 00469 const LocationPathSet::iterator 00470 JigdoCache::addLabel(const StringP& path, const StringL& label, 00471 const StringU& uri) { 00472 LocationPath tmp(path, label, uri); 00473 if (!tmp.path.empty() && tmp.path[tmp.path.length() - 1] != DIRSEP) 00474 tmp.path += DIRSEP; 00475 LocationPathSet::iterator i = locationPaths.find(tmp); 00476 if (i == locationPaths.end()) { 00477 return locationPaths.insert(tmp).first; // Create new entry 00478 } else { 00479 implicit_cast<string>(i->label) = tmp.label; // Overwrite old entry 00480 implicit_cast<string>(i->uri) = tmp.uri; 00481 return i; 00482 } 00483 } 00484 00485 #endif
Generated on Tue Sep 23 14:27:41 2008 for jigdo by
