Fossil

Check-in [93bb3231]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Enhance the "reconstruct" command so that sets the correct hash policy for artifacts read from disk.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256: 93bb323192ab7db60478f85777e205e0f3a496fb263b1b863868cd0f343112bb
User & Date: drh 2019-02-05 15:43:01
Context
2019-02-05
15:49
Fix the unified diff output so that it conforms with established conventions. check-in: 7fd2a365 user: drh tags: trunk
15:43
Enhance the "reconstruct" command so that sets the correct hash policy for artifacts read from disk. check-in: 93bb3231 user: drh tags: trunk
2019-02-01
15:55
Update the built-in SQLite to the latest 3.27.0 alpha. check-in: 6ba52d79 user: drh tags: trunk
2019-01-29
14:29
Add a test command to infer the hash policy from the length of path names on reconstruct (disabled by preprocessor directive). Closed-Leaf check-in: 8d1ed47c user: florian tags: reconstruct-sha3
Changes
Hide Diffs Side-by-Side Diffs Ignore Whitespace Patch

Changes to src/rebuild.c.

   180    180   */
   181    181   static int totalSize;       /* Total number of artifacts to process */
   182    182   static int processCnt;      /* Number processed so far */
   183    183   static int ttyOutput;       /* Do progress output */
   184    184   static Bag bagDone;         /* Bag of records rebuilt */
   185    185   
   186    186   static char *zFNameFormat;  /* Format string for filenames on deconstruct */
          187  +static int cchFNamePrefix;  /* Length of directory prefix in zFNameFormat */
          188  +static char *zDestDir;      /* Destination directory on deconstruct */
   187    189   static int prefixLength;    /* Length of directory prefix for deconstruct */
          190  +static int fKeepRid1;       /* Flag to preserve RID=1 on de- and reconstruct */
   188    191   
   189    192   
   190    193   /*
   191    194   ** Draw the percent-complete message.
   192    195   ** The input is actually the permill complete.
   193    196   */
   194    197   static void percent_complete(int permill){
................................................................................
   272    275         manifest_crosslink(rid, pUse, MC_NONE);
   273    276       }else{
   274    277         /* We are doing "fossil deconstruct" */
   275    278         char *zUuid = db_text(0, "SELECT uuid FROM blob WHERE rid=%d", rid);
   276    279         char *zFile = mprintf(zFNameFormat /*works-like:"%s:%s"*/,
   277    280                               zUuid, zUuid+prefixLength);
   278    281         blob_write_to_file(pUse,zFile);
          282  +      if( rid==1 && fKeepRid1!=0 ){
          283  +        char *zFnDotRid1 = mprintf("%s/.rid1", zDestDir);
          284  +        char *zFnRid1 = zFile + cchFNamePrefix + 1; /* Skip directory slash */
          285  +        Blob bFileContents = empty_blob;
          286  +        blob_appendf(&bFileContents,
          287  +          "# The file holding the artifact with RID=1\n"
          288  +          "%s\n", zFnRid1);
          289  +        blob_write_to_file(&bFileContents, zFnDotRid1);
          290  +        blob_reset(&bFileContents);
          291  +        free(zFnDotRid1);
          292  +      }
   279    293         free(zFile);
   280    294         free(zUuid);
   281    295         blob_reset(pUse);
   282    296       }
   283    297       assert( blob_is_reset(pUse) );
   284    298       rebuild_step_done(rid);
   285    299   
................................................................................
   929    943   void recon_read_dir(char *zPath){
   930    944     DIR *d;
   931    945     struct dirent *pEntry;
   932    946     Blob aContent; /* content of the just read artifact */
   933    947     static int nFileRead = 0;
   934    948     void *zUnicodePath;
   935    949     char *zUtf8Name;
          950  +  static int recursionLevel = 0;  /* Bookkeeping about the recursion level */
          951  +  static char *zFnRid1 = 0;       /* The file holding the artifact with RID=1 */
          952  +  static int cchPathInitial = 0;  /* The length of zPath on first recursion */
   936    953   
          954  +  recursionLevel++;
          955  +  if( recursionLevel==1 ){
          956  +    cchPathInitial = strlen(zPath);
          957  +    if( fKeepRid1!=0 ){
          958  +      char *zFnDotRid1 = mprintf("%s/.rid1", zPath);
          959  +      Blob bFileContents;
          960  +      if( blob_read_from_file(&bFileContents, zFnDotRid1, ExtFILE)!=-1 ){
          961  +        Blob line, value;
          962  +        while( blob_line(&bFileContents, &line)>0 ){
          963  +          if( blob_token(&line, &value)==0 ) continue;  /* Empty line */
          964  +          if( blob_buffer(&value)[0]=='#' ) continue;   /* Comment */
          965  +          blob_trim(&value);
          966  +          zFnRid1 = mprintf("%s/%s", zPath, blob_str(&value));
          967  +          break;
          968  +        }
          969  +        blob_reset(&bFileContents);
          970  +        if( zFnRid1 ){
          971  +          if( blob_read_from_file(&aContent, zFnRid1, ExtFILE)==-1 ){
          972  +            fossil_fatal("some unknown error occurred while reading \"%s\"",
          973  +                         zFnRid1);
          974  +          }else{
          975  +            recon_set_hash_policy(0, zFnRid1);
          976  +            content_put(&aContent);
          977  +            recon_restore_hash_policy();
          978  +            blob_reset(&aContent);
          979  +            fossil_print("\r%d", ++nFileRead);
          980  +            fflush(stdout);
          981  +          }
          982  +        }else{
          983  +          fossil_fatal("an error occurred while reading or parsing \"%s\"",
          984  +                       zFnDotRid1);
          985  +        }
          986  +      }
          987  +      free(zFnDotRid1);
          988  +    }
          989  +  }
   937    990     zUnicodePath = fossil_utf8_to_path(zPath, 1);
   938    991     d = opendir(zUnicodePath);
   939    992     if( d ){
   940    993       while( (pEntry=readdir(d))!=0 ){
   941    994         Blob path;
   942    995         char *zSubpath;
   943    996   
................................................................................
   951   1004         if( (pEntry->d_type==DT_UNKNOWN || pEntry->d_type==DT_LNK)
   952   1005             ? (file_isdir(zSubpath, ExtFILE)==1) : (pEntry->d_type==DT_DIR) )
   953   1006   #else
   954   1007         if( file_isdir(zSubpath, ExtFILE)==1 )
   955   1008   #endif
   956   1009         {
   957   1010           recon_read_dir(zSubpath);
   958         -      }else{
         1011  +      }else if( fossil_strcmp(zSubpath, zFnRid1)!=0 ){
   959   1012           blob_init(&path, 0, 0);
   960   1013           blob_appendf(&path, "%s", zSubpath);
   961   1014           if( blob_read_from_file(&aContent, blob_str(&path), ExtFILE)==-1 ){
   962   1015             fossil_fatal("some unknown error occurred while reading \"%s\"",
   963   1016                          blob_str(&path));
   964   1017           }
         1018  +        recon_set_hash_policy(cchPathInitial, blob_str(&path));
   965   1019           content_put(&aContent);
         1020  +        recon_restore_hash_policy();
   966   1021           blob_reset(&path);
   967   1022           blob_reset(&aContent);
   968   1023           fossil_print("\r%d", ++nFileRead);
   969   1024           fflush(stdout);
   970   1025         }
   971   1026         free(zSubpath);
   972   1027       }
   973   1028       closedir(d);
   974   1029     }else {
   975   1030       fossil_fatal("encountered error %d while trying to open \"%s\".",
   976   1031                     errno, g.argv[3]);
   977   1032     }
   978   1033     fossil_path_free(zUnicodePath);
         1034  +  if( recursionLevel==1 && zFnRid1!=0 ) free(zFnRid1);
         1035  +  recursionLevel--;
         1036  +}
         1037  +
         1038  +/*
         1039  +** Helper functions called from recon_read_dir() to set and restore the correct
         1040  +** hash policy for an artifact read from disk, inferred from the length of the
         1041  +** path name.
         1042  +*/
         1043  +static int saved_eHashPolicy = -1;
         1044  +
         1045  +void recon_set_hash_policy(
         1046  +  const int cchPathPrefix,    /* Directory prefix length for zUuidAsFilePath */
         1047  +  const char *zUuidAsFilePath /* Relative, well-formed, from recon_read_dir() */
         1048  +){
         1049  +  int cchUuidAsFilePath;
         1050  +  const char *zHashPart;
         1051  +  int cchHashPart = 0;
         1052  +  int new_eHashPolicy = -1;
         1053  +  assert( HNAME_COUNT==2 ); /* Review function if new hashes are implemented. */
         1054  +  if( zUuidAsFilePath==0 ) return;
         1055  +  cchUuidAsFilePath = strlen(zUuidAsFilePath);
         1056  +  if( cchUuidAsFilePath==0 ) return;
         1057  +  if( cchPathPrefix>=cchUuidAsFilePath ) return;
         1058  +  for( zHashPart = zUuidAsFilePath + cchPathPrefix; *zHashPart; zHashPart++ ){
         1059  +    if( *zHashPart!='/' ) cchHashPart++;
         1060  +  }
         1061  +  if( cchHashPart>=HNAME_LEN_K256 ){
         1062  +    new_eHashPolicy = HPOLICY_SHA3;
         1063  +  }else if( cchHashPart>=HNAME_LEN_SHA1 ){
         1064  +    new_eHashPolicy = HPOLICY_SHA1;
         1065  +  }
         1066  +  if( new_eHashPolicy!=-1 ){
         1067  +    saved_eHashPolicy = g.eHashPolicy;
         1068  +    g.eHashPolicy = new_eHashPolicy;
         1069  +  }
         1070  +}
         1071  +
         1072  +void recon_restore_hash_policy(){
         1073  +  if( saved_eHashPolicy!=-1 ){
         1074  +    g.eHashPolicy = saved_eHashPolicy;
         1075  +    saved_eHashPolicy = -1;
         1076  +  }
         1077  +}
         1078  +
         1079  +#if 0
         1080  +/*
         1081  +** COMMAND: test-hash-from-path*
         1082  +**
         1083  +** Usage: %fossil test-hash-from-path ?OPTIONS? DESTINATION UUID
         1084  +**
         1085  +** Generate a sample path name from DESTINATION and UUID, as the `deconstruct'
         1086  +** command would do.  Then try to guess the hash policy from the path name, as
         1087  +** the `reconstruct' command would do.
         1088  +**
         1089  +** No files or directories will be created.
         1090  +**
         1091  +** Options:
         1092  +**   -L|--prefixlength N     Set the length of the names of the DESTINATION
         1093  +**                           subdirectories to N.
         1094  +*/
         1095  +void test_hash_from_path_cmd(void) {
         1096  +  char *zDest;
         1097  +  char *zUuid;
         1098  +  char *zFile;
         1099  +  const char *zHashPolicy = "unknown";
         1100  +  const char *zPrefixOpt = find_option("prefixlength","L",1);
         1101  +  int iPrefixLength;
         1102  +  if( !zPrefixOpt ){
         1103  +    iPrefixLength = 2;
         1104  +  }else{
         1105  +    iPrefixLength = atoi(zPrefixOpt);
         1106  +    if( iPrefixLength<0 || iPrefixLength>9 ){
         1107  +      fossil_fatal("N(%s) is not a valid prefix length!",zPrefixOpt);
         1108  +    }
         1109  +  }
         1110  +  if( g.argc!=4 ){
         1111  +    usage ("?OPTIONS? DESTINATION UUID");
         1112  +  }
         1113  +  zDest = g.argv[2];
         1114  +  zUuid = g.argv[3];
         1115  +  if( iPrefixLength ){
         1116  +    zFNameFormat = mprintf("%s/%%.%ds/%%s",zDest,iPrefixLength);
         1117  +  }else{
         1118  +    zFNameFormat = mprintf("%s/%%s",zDest);
         1119  +  }
         1120  +  cchFNamePrefix = strlen(zDest);
         1121  +  zFile = mprintf(zFNameFormat /*works-like:"%s:%s"*/,
         1122  +                  zUuid, zUuid+iPrefixLength);
         1123  +  recon_set_hash_policy(cchFNamePrefix,zFile);
         1124  +  if( saved_eHashPolicy!=-1 ){
         1125  +    zHashPolicy = hpolicy_name();
         1126  +  }
         1127  +  recon_restore_hash_policy();
         1128  +  fossil_print(
         1129  +    "\nPath Name:   %s"
         1130  +    "\nHash Policy: %s\n",
         1131  +    zFile,zHashPolicy);
         1132  +  free(zFile);
         1133  +  free(zFNameFormat);
         1134  +  zFNameFormat = 0;
         1135  +  cchFNamePrefix = 0;
   979   1136   }
         1137  +#endif
   980   1138   
   981   1139   /*
   982   1140   ** COMMAND: reconstruct*
   983   1141   **
   984         -** Usage: %fossil reconstruct FILENAME DIRECTORY
         1142  +** Usage: %fossil reconstruct ?OPTIONS? FILENAME DIRECTORY
   985   1143   **
   986   1144   ** This command studies the artifacts (files) in DIRECTORY and
   987   1145   ** reconstructs the fossil record from them. It places the new
   988   1146   ** fossil repository in FILENAME. Subdirectories are read, files
   989   1147   ** with leading '.' in the filename are ignored.
   990   1148   **
         1149  +** Options:
         1150  +**    -K|--keep-rid1    Read the filename of the artifact with
         1151  +**                      RID=1 from the file .rid in DIRECTORY.
         1152  +**
   991   1153   ** See also: deconstruct, rebuild
   992   1154   */
   993   1155   void reconstruct_cmd(void) {
   994   1156     char *zPassword;
         1157  +  fKeepRid1 = find_option("keep-rid1","K",0)!=0;
   995   1158     if( g.argc!=4 ){
   996   1159       usage("FILENAME DIRECTORY");
   997   1160     }
   998   1161     if( file_isdir(g.argv[3], ExtFILE)!=1 ){
   999   1162       fossil_print("\"%s\" is not a directory\n\n", g.argv[3]);
  1000   1163       usage("FILENAME DIRECTORY");
  1001   1164     }
................................................................................
  1039   1202   ** writes all artifacts to the file system. The DESTINATION directory
  1040   1203   ** will be populated with subdirectories AA and files AA/BBBBBBBBB.., where
  1041   1204   ** AABBBBBBBBB.. is the 40+ character artifact ID, AA the first 2 characters.
  1042   1205   ** If -L|--prefixlength is given, the length (default 2) of the directory
  1043   1206   ** prefix can be set to 0,1,..,9 characters.
  1044   1207   **
  1045   1208   ** Options:
  1046         -**   -R|--repository REPOSITORY  deconstruct given REPOSITORY
  1047         -**   -L|--prefixlength N         set the length of the names of the DESTINATION
  1048         -**                               subdirectories to N
         1209  +**   -R|--repository REPOSITORY  Deconstruct given REPOSITORY.
         1210  +**   -K|--keep-rid1              Save the filename of the artifact with RID=1 to
         1211  +**                               the file .rid1 in the DESTINATION directory.
         1212  +**   -L|--prefixlength N         Set the length of the names of the DESTINATION
         1213  +**                               subdirectories to N.
  1049   1214   **   --private                   Include private artifacts.
  1050   1215   **
  1051   1216   ** See also: rebuild, reconstruct
  1052   1217   */
  1053   1218   void deconstruct_cmd(void){
  1054         -  const char *zDestDir;
  1055   1219     const char *zPrefixOpt;
  1056   1220     Stmt        s;
  1057   1221     int privateFlag;
  1058   1222   
         1223  +  fKeepRid1 = find_option("keep-rid1","K",0)!=0;
  1059   1224     /* get and check prefix length argument and build format string */
  1060   1225     zPrefixOpt=find_option("prefixlength","L",1);
  1061   1226     if( !zPrefixOpt ){
  1062   1227       prefixLength = 2;
  1063   1228     }else{
  1064   1229       if( zPrefixOpt[0]>='0' && zPrefixOpt[0]<='9' && !zPrefixOpt[1] ){
  1065   1230         prefixLength = (int)(*zPrefixOpt-'0');
................................................................................
  1090   1255     */
  1091   1256   #endif
  1092   1257     if( prefixLength ){
  1093   1258       zFNameFormat = mprintf("%s/%%.%ds/%%s",zDestDir,prefixLength);
  1094   1259     }else{
  1095   1260       zFNameFormat = mprintf("%s/%%s",zDestDir);
  1096   1261     }
         1262  +  cchFNamePrefix = strlen(zDestDir);
  1097   1263   
  1098   1264     bag_init(&bagDone);
  1099   1265     ttyOutput = 1;
  1100   1266     processCnt = 0;
  1101   1267     if (!g.fQuiet) {
  1102   1268       fossil_print("0 (0%%)...\r");
  1103   1269       fflush(stdout);